# The Causal Effects of Political Incivility in Social Media Discussions
# Duke Polarization Lab

# Start log file ----
sink("Logs/Incivility-Analysis-log.txt", split = TRUE)
cat("\n==============================\n")
cat("START LOG FILE\n")
cat("==============================\n")
start_time <- Sys.time()
cat("Starting script at", as.character(Sys.time()), "\n")

# Packages ----
# install.packages(c("readxl","tidyverse","vtable","lmerTest","cowplot",
# "here","xtable","RColorBrewer","stargazer","survey","srvyr"))

# Libraries ----
  library(readxl)
  library(tidyverse)
  library(vtable)
  library(lmerTest)
  library(cowplot)
  library(here)
  library(xtable)
  library(RColorBrewer)
  library(stargazer)
  library(survey)
  library(srvyr)
  options(scipen=999)
  
# Working Directory ----
  setwd(here()) # set directory to folder with R Project
  
# Load data ----
  
  # ANES
  anes <- read.csv("Data/Survey Data/anes_timeseries_2024_csv_20250430.csv") # read 2024 ANES time series
  
  # Platform data
  all.bot.comments <- read_xlsx("Data/Platform Data/all-bot-comments-2025-10-12.xlsx") # read all bot comments from SMA
  all.comments <- read_xlsx("Data/Platform Data/all-comments-2025-10-12.xlsx") # read all user comments from SMA
  all.posts <- read_xlsx("Data/Platform Data/all-posts-2025-10-12.xlsx") # read all user posts from SMA
  
  # Survey data
  df.intake <- read_xlsx("Data/Survey Data/civility-intake-attrition.xlsx") # read intake survey data for attrition analysis
  df.pre <- read_xlsx("Data/Survey Data/civility-pre-attrition.xlsx") # read prepped pre-treatment survey data for attrition analysis
  df.post <- read_xlsx("Data/Survey Data/civility-post-attrition.xlsx") # read prepped post-treatment survey data for attrition analysis
  mydata <- read_xlsx("Data/Survey Data/civility-full-ajps.xlsx") # read survey data
  
  # Prolific (anonymized)
  prolific <- read_xlsx("Data/Reference Data/prolific-prepped.xlsx") # read anonymized prolific data

  # Pretesting data
  researcher.posts <- read_xlsx("Data/Pretest Data/TextMeasures-Posts.xlsx") # read researcher-generated posts for pretesting
  researcher.comments <- read_xlsx("Data/Pretest Data/TextMeasures-Comments.xlsx") # read researcher-generated comments for pretesting
  bot.comments <- read_xlsx("Data/Pretest Data/TextMeasures-GPT-Comments.xlsx") # read gpt-generated comments for pretesting
  pt.posts <- read_xlsx("Data/Pretest Data/ConnectMeasures-Posts.xlsx") # read connect pretested researcher-generated posts
  pt.comments <- read_xlsx("Data/Pretest Data/ConnectMeasures-Comments.xlsx") # read connect pretested researcher-generated comments
  pt.gpt <- read_xlsx("Data/Pretest Data/ConnectMeasures-GPT-Comments.xlsx") # read connect pretested gpt comments
  pt.posts.pair <- read_xlsx("Data/Pretest Data/ConnectMeasures-Posts-Pairs.xlsx") # read connect pretested researcher-generated post pairs
  pt.comments.pair <- read_xlsx("Data/Pretest Data/ConnectMeasures-Comments-Pairs.xlsx") # read connect pretested researcher-generated comment pairs
  pt.gpt.pair <- read_xlsx("Data/Pretest Data/ConnectMeasures-GPT-Comments-Pairs.xlsx") # read connect pretested gpt-generated comment pairs

  # Reference frames for merging  
  ids <- mydata %>% # frame with participant_code and condition for later merging
    select(condition, participant_code) 
  pids <- mydata %>% # frame with participant_code and pid for later merging
    select(participant_code, pid7) 
  
############### ----
## MAIN RESULTS ----
############### ----
# Descriptive statistics: bot activity ----
cat("\n==============================\n")
cat("Mean and median bot comments\n")
cat("==============================\n")
mean(mydata$n.bot.comments) # mean number of bot comments per respondent
quantile(mydata$n.bot.comments, probs = c(0.5)) # median number of bot comments per respondent

# Manipulation check ----
cat("\n==============================\n")
cat("Manipulation check\n")
cat("==============================\n")
round(prop.table(table(mydata$manip.correct)),2) # overall correct rate
round(prop.table(table(mydata$manip.correct, mydata$condition), 2),2) # correct rate by condition

# Main models (Tables S8-9) ----

mod.h1 <- lm(scale(comfort.sharing) ~ condition.uncivil, data = mydata) # Comfort sharing opinions
mod.h1.t <- summary(mod.h1)$coefficients[,3] # t values
mod.h1.p <- ifelse(mod.h1.t < 0, pt(mod.h1.t, mod.h1$df.residual,lower=T), 
                   pt(mod.h1.t, mod.h1$df.residual,lower=F)) # one-tailed p

mod.h2a <- lm(n.posts ~ condition.uncivil, data = mydata) # Number of posts
mod.h2a.t <- summary(mod.h2a)$coefficients[,3] # t values
mod.h2a.p <- ifelse(mod.h2a.t < 0, pt(mod.h2a.t, mod.h2a$df.residual,lower=T), 
                    pt(mod.h2a.t, mod.h2a$df.residual,lower=F)) # one-tailed p

mod.h2b <- lm(n.comments ~ condition.uncivil, data = mydata) # Number of comments
mod.h2b.t <- summary(mod.h2b)$coefficients[,3] # t values
mod.h2b.p <- ifelse(mod.h2b.t < 0, pt(mod.h2b.t, mod.h2b$df.residual,lower=T), 
                    pt(mod.h2b.t, mod.h2b$df.residual,lower=F)) # one-tailed p

mod.h2c <- lm(n.comments.outparty ~ condition.uncivil, data = mydata) # Number of comments
mod.h2c.t <- summary(mod.h2c)$coefficients[,3] # t values
mod.h2c.p <- ifelse(mod.h2c.t < 0, pt(mod.h2c.t, mod.h2c$df.residual,lower=T), 
                    pt(mod.h2c.t, mod.h2c$df.residual,lower=F)) # one-tailed p

mod.h3a <- lm(scale(tox.posts) ~ condition.uncivil, data = mydata) # Toxicity of posts
mod.h3a.t <- summary(mod.h3a)$coefficients[,3] # t values
mod.h3a.p <- ifelse(mod.h3a.t < 0, pt(mod.h3a.t, mod.h3a$df.residual,lower=T), 
                    pt(mod.h3a.t, mod.h3a$df.residual,lower=F)) # one-tailed p

mod.h3b <- lm(scale(tox.comments) ~ condition.uncivil, data = mydata) # Toxicity of comments
mod.h3b.t <- summary(mod.h3b)$coefficients[,3] # t values
mod.h3b.p <- ifelse(mod.h3b.t < 0, pt(mod.h3b.t, mod.h3b$df.residual,lower=T), 
                    pt(mod.h3b.t, mod.h3b$df.residual,lower=F)) # one-tailed p

mod.h3c <- lm(scale(tox.comments.outparty) ~ condition.uncivil, data = mydata) # Toxicity of out-party comments
mod.h3c.t <- summary(mod.h3c)$coefficients[,3] # t values
mod.h3c.p <- ifelse(mod.h3c.t < 0, pt(mod.h3c.t, mod.h3c$df.residual,lower=T), 
                    pt(mod.h3c.t, mod.h3c$df.residual,lower=F)) # one-tailed p

mod.h4a <- lm(scale(post.therm.inparty) ~ condition.uncivil + pre.therm.inparty, data = mydata) # change in in-party therm
mod.h4a.t <- summary(mod.h4a)$coefficients[,3] # t values
mod.h4a.p <- ifelse(mod.h4a.t < 0, pt(mod.h4a.t, mod.h4a$df.residual,lower=T), 
                    pt(mod.h4a.t, mod.h4a$df.residual,lower=F)) # one-tailed p

mod.h4b <- lm(scale(post.therm.outparty) ~ condition.uncivil + pre.therm.outparty, data = mydata) # change in out-party therm
mod.h4b.t <- summary(mod.h4b)$coefficients[,3] # t values
mod.h4b.p <- ifelse(mod.h4b.t < 0, pt(mod.h4b.t, mod.h4b$df.residual,lower=T), 
                    pt(mod.h4b.t, mod.h4b$df.residual,lower=F)) # one-tailed p

mod.h5 <- lm(scale(post.clim.polar) ~ condition.uncivil + pre.clim.polar, data = mydata) # change in perceived polarization
mod.h5.t <- summary(mod.h5)$coefficients[,3] # t values
mod.h5.p <- ifelse(mod.h5.t < 0, pt(mod.h5.t, mod.h5$df.residual,lower=T), 
                   pt(mod.h5.t, mod.h5$df.residual,lower=F)) # one-tailed p

mod.h6 <- lm(scale(post.trust) ~ condition.uncivil + pre.trust, data = mydata) # change in trust
mod.h6.t <- summary(mod.h6)$coefficients[,3] # t values
mod.h6.p <- ifelse(mod.h6.t < 0, pt(mod.h6.t, mod.h6$df.residual,lower=T), 
                   pt(mod.h6.t, mod.h6$df.residual,lower=F)) # one-tailed p

mod.h7 <- lm(scale(post.democ2) ~ condition.uncivil + pre.democ2, data = mydata) # change in democratic satisfaction
mod.h7.t <- summary(mod.h7)$coefficients[,3] # t values
mod.h7.p <- ifelse(mod.h7.t < 0, pt(mod.h7.t, mod.h7$df.residual,lower=T), 
                   pt(mod.h7.t, mod.h7$df.residual,lower=F)) # one-tailed p


mods1 <- list(mod.h1, mod.h2a, mod.h2b, mod.h2c, mod.h3a, mod.h3b, mod.h3c) 
cat("\n==============================\n")
cat("Table S8: ATE of assignment to uncivil newsfeed (H1-H3)\n")
cat("==============================\n")
stargazer(mods1, # make latex table for S8
          dep.var.labels.include = T,
          dep.var.labels = c("Comfort sharing", "Count: post", "Count: comments", "Count: Out-party comments", "Toxicity: post", "Toxicity: comments", "Toxicity: Out-party comments"),
          p = list(mod.h1.p, 
                   mod.h2a.p, mod.h2b.p, mod.h2c.p,
                   mod.h3a.p, mod.h3b.p, mod.h3c.p),
          covariate.labels = c("Uncivil newsfeed", "(Constant)"),
          keep.stat = c("n", "adj.rsq"),
          star.char    = c("\\dagger", "*", "**"),
          report = ('vc*s'),
          digits = 3,
          align = T,
          no.space = T,
          out = "Tables/TableS8_ate_h1_h3.tex")

mods2 <- list(mod.h4a, mod.h4b, mod.h5, mod.h6, mod.h7) 
cat("\n==============================\n")
cat("Table S9: ATE of assignment to uncivil newsfeed (H4-H7)\n")
cat("==============================\n")
stargazer(mods2, # make latex table for S9
          dep.var.labels.include = T,
          dep.var.labels = c("In-party thermometer", "Out-party thermometer", "Perceived polarization",
                             "Trust", "Democratic satisfaction"),
          p = list(mod.h4a.p, mod.h4b.p,
                   mod.h5.p,
                   mod.h6.p,
                   mod.h7.p),
          covariate.labels = c("Uncivil newsfeed",
                               "Pretreatment: In-party therm.", "Pretreatment: Out-party therm.",
                               "Pretreatment: Perceived polarization",
                               "Pretreatment: Trust in gov.",
                               "Pretreatment: Democratic satisfaction","(Constant)"),
          keep.stat = c("n", "adj.rsq"),
          star.char    = c("\\dagger", "*", "**"),
          report = ('vc*s'),
          digits = 3,
          align = T,
          no.space = T,
          out = "Tables/TableS9_ate_h4_h7.tex")

# Coefficient plots (Figures 3,6) ----
figure_3 <- as.data.frame(coef(summary(mod.h1)))[2,] %>% # H1
  mutate(var = row.names(as.data.frame(coef(summary(mod.h1)))[2,]),
         stderror = `Std. Error`,
         lowbound = Estimate - (qnorm(0.95)*stderror),
         highbound = Estimate + (qnorm(0.95)*stderror),
         lowbound95 = Estimate - (qnorm(0.975)*stderror),
         highbound95 = Estimate + (qnorm(0.975)*stderror),
         outcome = "Comfort sharing opinions",
         hypothesis = "H1")
add <- as.data.frame(coef(summary(mod.h2a)))[2,] %>% # H2a
  mutate(var = row.names(as.data.frame(coef(summary(mod.h2a)))[2,]),
         stderror = `Std. Error`,
         lowbound = Estimate - (qnorm(0.95)*stderror),
         highbound = Estimate + (qnorm(0.95)*stderror),
         lowbound95 = Estimate - (qnorm(0.975)*stderror),
         highbound95 = Estimate + (qnorm(0.975)*stderror),
         outcome = "Number of posts",
         hypothesis = "H2")
figure_3 <- rbind(figure_3,add)
add <- as.data.frame(coef(summary(mod.h2b)))[2,] %>% # H2b
  mutate(var = row.names(as.data.frame(coef(summary(mod.h2b)))[2,]),
         stderror = `Std. Error`,
         lowbound = Estimate - (qnorm(0.95)*stderror),
         highbound = Estimate + (qnorm(0.95)*stderror),
         lowbound95 = Estimate - (qnorm(0.975)*stderror),
         highbound95 = Estimate + (qnorm(0.975)*stderror),
         outcome = "Number of comments",
         hypothesis = "H2")
figure_3 <- rbind(figure_3,add)
add <- as.data.frame(coef(summary(mod.h2c)))[2,] %>% # H2c
  mutate(var = row.names(as.data.frame(coef(summary(mod.h2c)))[2,]),
         stderror = `Std. Error`,
         lowbound = Estimate - (qnorm(0.95)*stderror),
         highbound = Estimate + (qnorm(0.95)*stderror),
         lowbound95 = Estimate - (qnorm(0.975)*stderror),
         highbound95 = Estimate + (qnorm(0.975)*stderror),
         outcome = "Number of out-party comments",
         hypothesis = "H2")
figure_3 <- rbind(figure_3, add)
add <- as.data.frame(coef(summary(mod.h3a)))[2,] %>% # H3a
  mutate(var = row.names(as.data.frame(coef(summary(mod.h3a)))[2,]),
         stderror = `Std. Error`,
         lowbound = Estimate - (qnorm(0.95)*stderror),
         highbound = Estimate + (qnorm(0.95)*stderror),
         lowbound95 = Estimate - (qnorm(0.975)*stderror),
         highbound95 = Estimate + (qnorm(0.975)*stderror),
         outcome = "Toxicity of posts",
         hypothesis = "H3")
figure_3 <- rbind(figure_3,add)
add <- as.data.frame(coef(summary(mod.h3b)))[2,] %>% # H3b
  mutate(var = row.names(as.data.frame(coef(summary(mod.h3b)))[2,]),
         stderror = `Std. Error`,
         lowbound = Estimate - (qnorm(0.95)*stderror),
         highbound = Estimate + (qnorm(0.95)*stderror),
         lowbound95 = Estimate - (qnorm(0.975)*stderror),
         highbound95 = Estimate + (qnorm(0.975)*stderror),
         outcome = "Toxicity of comments",
         hypothesis = "H3")
figure_3 <- rbind(figure_3,add)
add <- as.data.frame(coef(summary(mod.h3c)))[2,] %>% # H3c
  mutate(var = row.names(as.data.frame(coef(summary(mod.h3c)))[2,]),
         stderror = `Std. Error`,
         lowbound = Estimate - (qnorm(0.95)*stderror),
         highbound = Estimate + (qnorm(0.95)*stderror),
         lowbound95 = Estimate - (qnorm(0.975)*stderror),
         highbound95 = Estimate + (qnorm(0.975)*stderror),
         outcome = "Toxicity of out-party comments",
         hypothesis = "H3")
figure_3 <- rbind(figure_3,add)
add <- as.data.frame(coef(summary(mod.h4a)))[2,] %>% # H4a
  mutate(var = row.names(as.data.frame(coef(summary(mod.h4a)))[2,]),
         stderror = `Std. Error`,
         lowbound = Estimate - (qnorm(0.95)*stderror),
         highbound = Estimate + (qnorm(0.95)*stderror),
         lowbound95 = Estimate - (qnorm(0.975)*stderror),
         highbound95 = Estimate + (qnorm(0.975)*stderror),
         outcome = "In-party thermometer rating",
         hypothesis = "H4")
figure_3 <- rbind(figure_3,add)
add <- as.data.frame(coef(summary(mod.h4b)))[2,] %>% # H4b
  mutate(var = row.names(as.data.frame(coef(summary(mod.h4b)))[2,]),
         stderror = `Std. Error`,
         lowbound = Estimate - (qnorm(0.95)*stderror),
         highbound = Estimate + (qnorm(0.95)*stderror),
         lowbound95 = Estimate - (qnorm(0.975)*stderror),
         highbound95 = Estimate + (qnorm(0.975)*stderror),
         outcome = "Out-party thermometer rating",
         hypothesis = "H4")
figure_3 <- rbind(figure_3,add)
rm(add)
row.names(figure_3) <- NULL
figure_3 <- figure_3 %>%
  select(var, Estimate, stderror, lowbound, highbound, lowbound95, highbound95, outcome, hypothesis) %>%
  mutate(Group = factor(c(rep("Newsfeed experience", 7), rep("Party attitudes", 2))),
         significant = case_when(lowbound <= 0 & highbound >= 0 ~ "No", TRUE ~ "Yes"))
figure_3$outcome <- factor(figure_3$outcome, 
                           levels = c("Toxicity of out-party comments", "Toxicity of comments", "Toxicity of posts",
                                      "Number of out-party comments", "Number of comments", "Number of posts",
                                      "Comfort sharing opinions",
                                      "Out-party thermometer rating", 
                                      "In-party thermometer rating"))

cat("\n==============================\n")
cat("Figure 3: Average treatment effect (ATE) of the uncivil newsfeed on newsfeed experience and party attitudes\n")
cat("==============================\n")
print(figure_3)
ggdraw(figure_3 %>% # plot Figure 3
         ggplot(aes(x = Estimate, color = significant)) +
         geom_hline(yintercept = 0, lty = 2) +
         geom_pointrange(aes(x = outcome, y = Estimate, ymin = lowbound, ymax = highbound),
                         shape = 20, position = position_dodge(0.5), size = 1.2, linewidth = 0.9) +
         facet_grid(Group~., switch = "y", scales = "free_y", space = "free_y") +
         scale_color_manual(values = c("grey60","black")) +
         scale_y_continuous(limits = c(-0.6,0.6), breaks = seq(-0.5,0.5,0.25)) +
         coord_flip() +
         labs(y = "ATE of uncivil newsfeed", x = "") +
         theme_bw() +
         theme(
           axis.text = element_text(size = 12, color = "black"),
           axis.text.y = element_text(hjust = 1),
           axis.ticks = element_blank(),
           axis.title.x = element_text(size = 12, margin = margin(t = 10), color = "black"),
           axis.title.y = element_blank(),
           legend.position = "none",
           panel.grid.major = element_blank(),
           panel.grid.minor = element_blank(),
           plot.background = element_rect(color = "white"),
           plot.margin = margin(0.25, 0.5, 0.25, 0.5, "in"),
           strip.background = element_blank(),
           strip.clip = "off",
           strip.placement = "outside",  # Place facet labels outside the plot area
           strip.text = element_blank()
         )) +
  draw_label("Party attitudes", size = 12.5, hjust = 1.41, vjust = 12, fontface = "bold") +
  draw_label("Newsfeed experience", size = 12.5, hjust = 1.28, vjust = -27.25, fontface = "bold")
ggsave(height = 7.25, width = 6.75, "Figures/Figure3_coef_plot_h1_h4.png")

figure_6 <- as.data.frame(coef(summary(mod.h5)))[2,] %>% # H5
  mutate(var = row.names(as.data.frame(coef(summary(mod.h5)))[2,]),
         stderror = `Std. Error`,
         lowbound = Estimate - (qnorm(0.95)*stderror),
         highbound = Estimate + (qnorm(0.95)*stderror),
         lowbound95 = Estimate - (qnorm(0.975)*stderror),
         highbound95 = Estimate + (qnorm(0.975)*stderror),
         outcome = "Perceived polarization",
         hypothesis = "H5")
add <- as.data.frame(coef(summary(mod.h6)))[2,] %>% # H6
  mutate(var = row.names(as.data.frame(coef(summary(mod.h6)))[2,]),
         stderror = `Std. Error`,
         lowbound = Estimate - (qnorm(0.95)*stderror),
         highbound = Estimate + (qnorm(0.95)*stderror),
         lowbound95 = Estimate - (qnorm(0.975)*stderror),
         highbound95 = Estimate + (qnorm(0.975)*stderror),
         outcome = "Trust in government",
         hypothesis = "H6")
figure_6 <- rbind(figure_6,add)  
add <- as.data.frame(coef(summary(mod.h7)))[2,] %>% # H7
  mutate(var = row.names(as.data.frame(coef(summary(mod.h7)))[2,]),
         stderror = `Std. Error`,
         lowbound = Estimate - (qnorm(0.95)*stderror),
         highbound = Estimate + (qnorm(0.95)*stderror),
         lowbound95 = Estimate - (qnorm(0.975)*stderror),
         highbound95 = Estimate + (qnorm(0.975)*stderror),
         outcome = "Democratic satisfaction",
         hypothesis = "H7")
figure_6 <- rbind(figure_6,add)
rm(add)
row.names(figure_6) <- NULL
figure_6 <- figure_6 %>%
  select(var, Estimate, stderror, lowbound, highbound, lowbound95, highbound95, outcome, hypothesis) %>%
  mutate(Group = factor(c(rep("System attitudes", 3))),
         significant = case_when(lowbound <= 0 & highbound >= 0 ~ "No", TRUE ~ "Yes"))

figure_6$outcome <- factor(figure_6$outcome, levels = c("Democratic satisfaction",
                                                        "Trust in government",
                                                        "Perceived polarization"))

cat("\n==============================\n")
cat("Figure 6: Average treatment effects (ATE) of the uncivil newsfeed on system attitudes\n")
cat("==============================\n")
print(figure_6)
ggplot(figure_6, aes(x = Estimate, color = significant)) + # plot Figure 6
  geom_hline(yintercept = 0, lty = 2) +
  geom_pointrange(aes(x = outcome, y = Estimate, ymin = lowbound, ymax = highbound),
                  shape = 20, position = position_dodge(0.5), size = 1.2, linewidth = 0.9) +
  facet_grid(Group~., switch = "y", scales = "free_y", space = "free_y") +
  scale_color_manual(values = c("grey60","black")) +
  scale_y_continuous(limits = c(-0.2, 0.2), breaks = seq(-0.2, 0.2, 0.1)) +
  coord_flip() +
  labs(y = "ATE of uncivil newsfeed", x = "") +
  theme_bw() +
  theme(
    axis.text = element_text(size = 12, color = "black"),
    axis.ticks = element_blank(),
    axis.title.x = element_text(size = 12, margin = margin(t = 10)),
    axis.title.y = element_blank(),
    legend.position = "none",
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    plot.background = element_rect(color = "white"),
    plot.margin = margin(0.25, 0.25, 0.25, 0.25, "in"),
    strip.background = element_blank(),
    strip.placement = "outside",  # Place facet labels outside the plot area
    strip.text = element_text(size = 12, margin = margin(r = 10), angle = 90)
  )
ggsave(height = 4, width = 6.75, "Figures/Figure6_coef_plot_h5_h7.png")

# Confidence intervals of estimates & standard deviations for substantive interpretation ----
cat("\n==============================\n")
cat("Table S7: Confidence intervals of estimates & standard deviations for substantive interpretation\n")
cat("==============================\n")
round(confint(mod.h1),3)
round(confint(mod.h2a),3)
round(confint(mod.h2b),3)
round(confint(mod.h2c),3)
round(confint(mod.h3a),3)
round(confint(mod.h3b),3)
round(confint(mod.h3c),3)
round(confint(mod.h4a),3)
round(confint(mod.h4b),3)
round(confint(mod.h5),3)
round(confint(mod.h6),3)
round(confint(mod.h7),3)

round(sd(mydata$comfort.sharing, na.rm = T), 1) # 1.3
round(sd(mydata$tox.posts, na.rm = T), 1) # 0.1
round(sd(mydata$tox.comments, na.rm = T), 1) # 0.1
round(sd(mydata$post.therm.outparty, na.rm = T), 1) # 21.1

# Multiple hypothesis testing adjustments ----
cat("\n==============================\n")
cat("Multiple hypothesis testing adjustments\n")
cat("==============================\n")

raw_pvals <- c(
  mod.h1.p["condition.uncivil"], # comfort sharing
  mod.h2a.p["condition.uncivil"], # count of posts
  mod.h2b.p["condition.uncivil"], # count of comments
  mod.h2c.p["condition.uncivil"], # count of out-party comments
  mod.h3a.p["condition.uncivil"], # toxicity of posts
  mod.h3b.p["condition.uncivil"], # toxicity of comments
  mod.h3c.p["condition.uncivil"], # toxicity of out-party comments
  mod.h4a.p["condition.uncivil"], # thermometer: in-party
  mod.h4b.p["condition.uncivil"], # thermometer: out-party
  mod.h5.p["condition.uncivil"], # perceived polarization
  mod.h6.p["condition.uncivil"], # trust in government
  mod.h7.p["condition.uncivil"] # democratic satisfaction
)

p.adjust(raw_pvals, method = "holm")
p.adjust(raw_pvals, method = "hochberg")
p.adjust(raw_pvals, method = "hommel")
p.adjust(raw_pvals, method = "BY")
p.adjust(raw_pvals, method = "fdr")
p.adjust(raw_pvals, method = "bonferroni")




# NLP measures of posts and comments, models (Tables S12-13) ----
mod.posts.politeness <- lm(-1*scale(politeness.posts) ~ condition.uncivil, data = mydata) # model for politeness in posts (reverse-coded)
mod.posts.vader <- lm(-1*scale(vader.posts) ~ condition.uncivil, data = mydata) # model for vader in posts (reverse-coded)
mod.posts.identity.attacks <- lm(scale(identity.attack.posts) ~ condition.uncivil, data = mydata) # model for identity attacks in posts
mod.posts.insult <- lm(scale(insult.posts) ~ condition.uncivil, data = mydata) # model for insults in posts
mod.posts.profanity <- lm(scale(profanity.posts) ~ condition.uncivil, data = mydata) # model for profanity in posts

mods <- list(mod.posts.politeness, mod.posts.vader, mod.posts.identity.attacks, mod.posts.insult, mod.posts.profanity)

cat("\n==============================\n")
cat("Table S12: ATE on assignment to uncivil newsfeed on conversational features of posts\n")
cat("==============================\n")
stargazer(mods, # make latex table for S12
          title = "ATE of assignment to uncivil newsfeed on conversational features of posts",
          dep.var.labels.include = T,
          dep.var.labels = c("Politeness","VADER","Identity attack","Insult","Profanity"),
          covariate.labels = c("Uncivil newsfeed", "(Constant)"),
          keep.stat = c("n", "adj.rsq"),
          star.char    = c("\\dagger", "*", "**"),
          #report = ('vc*p'),
          digits = 3,
          align = T,
          no.space = T,
          out = "Tables/TableS12_ate_post_convo_features.tex")

mod.comments.politeness <- lm(-1*scale(politeness.comments) ~ condition.uncivil, data = mydata) # model for politeness in comments (reverse-coded)
round(confint(mod.comments.politeness),3)

mod.comments.vader <- lm(-1*scale(vader.comments) ~ condition.uncivil, data = mydata) # model for vader in comments (reverse-coded)
round(confint(mod.comments.vader),3)

mod.comments.identity.attacks <- lm(scale(identity.attack.comments) ~ condition.uncivil, data = mydata) # model for identity attacks in comments
round(confint(mod.comments.identity.attacks),3)

mod.comments.insult <- lm(scale(insult.comments) ~ condition.uncivil, data = mydata) # model for insults in comments
round(confint(mod.comments.insult),3)

mod.comments.profanity <- lm(scale(profanity.comments) ~ condition.uncivil, data = mydata) # model for profanity in comments
round(confint(mod.comments.profanity),3)

mods <- list(mod.comments.politeness, mod.comments.vader, mod.comments.identity.attacks, mod.comments.insult, mod.comments.profanity)

cat("\n==============================\n")
cat("Table S13: ATE on assignment to uncivil newsfeed on conversational features of comments\n")
cat("==============================\n")
stargazer(mods, # make latex table for S13
          title = "ATE on assignment to uncivil newsfeed on conversational features of comments",
          dep.var.labels.include = T,
          dep.var.labels = c("Politeness","VADER","Identity attack","Insult","Profanity"),
          covariate.labels = c("Uncivil newsfeed", "(Constant)"),
          keep.stat = c("n", "adj.rsq"),
          star.char    = c("\\dagger", "*", "**"),
          digits = 3,
          align = T,
          no.space = T,
          out = "Tables/TableS13_ate_comment_convo_features.tex")


# NLP measures of posts and comments, coefficient plot (Figure 4) ----
figure_4 <- as.data.frame(coef(summary(mod.comments.politeness)))[2,] %>% # H1
  mutate(var = row.names(as.data.frame(coef(summary(mod.comments.politeness)))[2,]),
         stderror = `Std. Error`,
         lowbound = Estimate - (qnorm(0.975)*stderror),
         highbound = Estimate + (qnorm(0.975)*stderror),
         outcome = "Politeness\n(reversed)",
         content.type = "Comments")
add <- as.data.frame(coef(summary(mod.comments.vader)))[2,] %>% # H2a
  mutate(var = row.names(as.data.frame(coef(summary(mod.comments.vader)))[2,]),
         stderror = `Std. Error`,
         lowbound = Estimate - (qnorm(0.975)*stderror),
         highbound = Estimate + (qnorm(0.975)*stderror),
         outcome = "VADER sentiment\n(reversed)",
         content.type = "Comments")
figure_4 <- rbind(figure_4,add)
add <- as.data.frame(coef(summary(mod.comments.identity.attacks)))[2,] %>% # H2b
  mutate(var = row.names(as.data.frame(coef(summary(mod.comments.identity.attacks)))[2,]),
         stderror = `Std. Error`,
         lowbound = Estimate - (qnorm(0.975)*stderror),
         highbound = Estimate + (qnorm(0.975)*stderror),
         outcome = "Identity attacks",
         content.type = "Comments")
figure_4 <- rbind(figure_4,add)
add <- as.data.frame(coef(summary(mod.comments.insult)))[2,] %>% # H2b
  mutate(var = row.names(as.data.frame(coef(summary(mod.comments.insult)))[2,]),
         stderror = `Std. Error`,
         lowbound = Estimate - (qnorm(0.975)*stderror),
         highbound = Estimate + (qnorm(0.975)*stderror),
         outcome = "Insults",
         content.type = "Comments")
figure_4 <- rbind(figure_4, add)
add <- as.data.frame(coef(summary(mod.comments.profanity)))[2,] %>% # H3a
  mutate(var = row.names(as.data.frame(coef(summary(mod.comments.profanity)))[2,]),
         stderror = `Std. Error`,
         lowbound = Estimate - (qnorm(0.975)*stderror),
         highbound = Estimate + (qnorm(0.975)*stderror),
         outcome = "Profanity",
         content.type = "Comments")
figure_4 <- rbind(figure_4,add)
add <- as.data.frame(coef(summary(mod.posts.politeness)))[2,] %>% # H1
  mutate(var = row.names(as.data.frame(coef(summary(mod.posts.politeness)))[2,]),
         stderror = `Std. Error`,
         lowbound = Estimate - (qnorm(0.975)*stderror),
         highbound = Estimate + (qnorm(0.975)*stderror),
         outcome = "Politeness\n(reversed)",
         content.type = "Posts")
figure_4 <- rbind(figure_4,add)
add <- as.data.frame(coef(summary(mod.posts.vader)))[2,] %>% # H2a
  mutate(var = row.names(as.data.frame(coef(summary(mod.posts.vader)))[2,]),
         stderror = `Std. Error`,
         lowbound = Estimate - (qnorm(0.975)*stderror),
         highbound = Estimate + (qnorm(0.975)*stderror),
         outcome = "VADER sentiment\n(reversed)",
         content.type = "Posts")
figure_4 <- rbind(figure_4,add)
add <- as.data.frame(coef(summary(mod.posts.identity.attacks)))[2,] %>% # H2b
  mutate(var = row.names(as.data.frame(coef(summary(mod.posts.identity.attacks)))[2,]),
         stderror = `Std. Error`,
         lowbound = Estimate - (qnorm(0.975)*stderror),
         highbound = Estimate + (qnorm(0.975)*stderror),
         outcome = "Identity attacks",
         content.type = "Posts")
figure_4 <- rbind(figure_4,add)
add <- as.data.frame(coef(summary(mod.posts.insult)))[2,] %>% # H2b
  mutate(var = row.names(as.data.frame(coef(summary(mod.posts.insult)))[2,]),
         stderror = `Std. Error`,
         lowbound = Estimate - (qnorm(0.975)*stderror),
         highbound = Estimate + (qnorm(0.975)*stderror),
         outcome = "Insults",
         content.type = "Posts")
figure_4 <- rbind(figure_4, add)
add <- as.data.frame(coef(summary(mod.posts.profanity)))[2,] %>% # H3a
  mutate(var = row.names(as.data.frame(coef(summary(mod.posts.profanity)))[2,]),
         stderror = `Std. Error`,
         lowbound = Estimate - (qnorm(0.975)*stderror),
         highbound = Estimate + (qnorm(0.975)*stderror),
         outcome = "Profanity",
         content.type = "Posts")
figure_4 <- rbind(figure_4,add)
rm(add)
row.names(figure_4) <- NULL
figure_4 <- figure_4 %>%
  select(var, Estimate, stderror, lowbound, highbound, outcome, content.type) %>%
  mutate(significant = case_when(lowbound <= 0 & highbound >= 0 ~ "No", TRUE ~ "Yes"),
         outcome = factor(outcome, 
                          levels = c("Profanity", "Insults", "Identity attacks",
                                     "VADER sentiment\n(reversed)", "Politeness\n(reversed)")),
         content.type = factor(content.type, levels = c("Comments","Posts")))


cat("\n==============================\n")
cat("Figure 4: Average treatment effect (ATE) of the uncivil newsfeed on conversational features of users' posts and comments\n")
cat("==============================\n")
print(figure_4)
figure_4 %>% # make Figure 4
  ggplot(aes(x = Estimate, color = significant, group = content.type, shape = content.type)) +
  geom_hline(yintercept = 0, lty = 2) +
  geom_pointrange(aes(x = outcome, y = Estimate, ymin = lowbound, ymax = highbound),
                  position = position_dodge(0.5), size = 1, linewidth = 0.9) +
  scale_color_manual(values = c("grey60","black"), guide = "none") +
  scale_shape_discrete("Content type", breaks = c("Posts","Comments")) +
  scale_y_continuous(limits = c(-0.6,0.6), breaks = seq(-0.5,0.5,0.25)) +
  theme_bw() +
  coord_flip() +
  labs(y = "ATE of uncivil newsfeed", x = "") +
  theme(
    axis.text = element_text(size = 12, color = "black"),
    axis.ticks = element_blank(),
    axis.title.x = element_text(size = 12, margin = margin(t = 10)),
    axis.title.y = element_blank(),
    legend.position = "bottom",
    legend.key.size = unit(0.3, "cm"), 
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    plot.background = element_rect(color = "white"),
    plot.margin = margin(0.1, 0.75, 0, 0, "in"),
    strip.background = element_blank(),
    strip.placement = "outside",  # Place facet labels outside the plot area
    strip.text = element_text(size = 16)
  )
ggsave(height = 5, width = 6.75, "Figures/Figure4_coef_plot_nlp.png")

# Commenting and toxicity by post type, political vs. nonpolitical (Figure 5) ----
cat("\n==============================\n")
cat("Figure 5: Commenting and comment toxicity on in-party, out-party, and non-political posts by condition\n")
cat("==============================\n")

toxicity.by.post.type <- all.comments %>%
  left_join(ids, by = "participant_code") %>%
  left_join(pids, by = "participant_code") %>%
  mutate(post.type = case_when(post.type %in% c("Bonus","Filler") ~ "Non-political",
                               post.party == "Republican" & pid7 %in% c(5:7) ~ "In-party",
                               post.party == "Democratic" & pid7 %in% c(1:3) ~ "In-party",
                               post.party == "Republican" & pid7 %in% c(1:3) ~ "Out-party",
                               post.party == "Democratic" & pid7 %in% c(5:7) ~ "Out-party"),
         post.type = factor(post.type, levels = c("In-party","Out-party","Non-political"))) %>%
  drop_na(post.type) %>%
  group_by(condition, post.type) %>%
  summarize(mean_se = mean_se(tox, mult = qnorm(0.975)), .groups = 'drop') %>%
  mutate(outcome = "(b) Comment toxicity")

commenting.by.post.type <- mydata %>%
  select(condition, commented.inparty, commented.outparty, commented.nonpolitical) %>%
  pivot_longer(cols = commented.inparty:commented.nonpolitical, names_to = "post.type", values_to = "value") %>%
  group_by(condition, post.type) %>%
  summarize(mean_se = mean_se(value, mult = qnorm(0.975)), .groups = 'drop') %>%
  mutate(condition = factor(condition, levels = c("civil","uncivil")),
         post.type = case_when(post.type == "commented.inparty" ~ "In-party",
                               post.type == "commented.outparty" ~ "Out-party",
                               post.type == "commented.nonpolitical" ~ "Non-political"),
         post.type = factor(post.type, levels = c("In-party","Out-party","Non-political")),
         outcome = "(a) Comment at all")

(behavior.by.post.type <- rbind(toxicity.by.post.type, commenting.by.post.type))


ggplot(behavior.by.post.type, aes(x = post.type, y = mean_se$y, group = condition, fill = condition)) + # plot figure 5
  facet_wrap(~outcome, ncol = 2, nrow = 1, scales = "free_y") +
  geom_bar(stat = "identity", position = "dodge", width = 0.5) +
  geom_errorbar(aes(ymin = mean_se$ymin, ymax = mean_se$ymax), 
                position = position_dodge(width = 0.5), width = 0.1) +
  labs(x = "Post type",y = "Value") +
  scale_y_continuous(labels = scales::label_number(accuracy=0.01),
                     breaks = scales::breaks_extended(n = 5.5)) +
  scale_fill_manual("",
                    values = brewer.pal(3, "PuOr")[c(1, 3)],
                    breaks = c("civil","uncivil"), 
                    labels = c("Civil","Uncivil")) +
  theme_bw() +
  theme(plot.margin = margin(0.25, 0.25, 0, 0.25, "in"),
        axis.text = element_text(size = 12, color = "black"),
        axis.title = element_text(size = 12),
        axis.title.x = element_text(margin = margin(t=10)),
        axis.title.y = element_text(margin = margin(r=10)),
        legend.title = element_text(size = 12),
        legend.text = element_text(size = 12),
        panel.grid = element_blank(),
        legend.position = "bottom",
        strip.background = element_rect(fill = "white", color = "white"),
        strip.text = element_text(size = 14, color = "black", vjust = 2),
        panel.grid.minor = element_blank())
ggsave(height = 5, width = 10, "Figures/Figure5_comment_behavior_by_post_type.png") 


################################ ----
## SUPPLEMENTAL TABLES & FIGURES ----
################################ ----
# Descriptive statistics: survey measures (Table S2-3) ----
desc.stats.categorical <- mydata %>% # frame of categorical variables
  select(device, educ4, gender3, pid4, race6)
  
cat("\n==============================\n")
cat("Table S2: Summary statistics for categorical variables\n")
cat("==============================\n")
st(desc.stats.categorical,
   out = "latex",
   digits = 3,
   numformat = formatfunc(digits = 1, nsmall = 1, big.mark = ','),
   factor.counts = TRUE,
   file = "Tables/TableS2_desc_categorical.tex")

desc.stats.continuous <- mydata %>% # frame of continuous variables
  select(age, 
         comfort.sharing,
         pre.climplc.dem, pre.climplc.rep,
         post.climplc.dem, post.climplc.rep,
         pre.therm.inparty, post.therm.inparty,
         pre.therm.outparty, post.therm.outparty,
         pre.trust, post.trust,
         pre.democ2, post.democ2)
cat("\n==============================\n")
cat("Table S3: Summary statistics for continuous variables\n")
cat("==============================\n")
st(desc.stats.continuous, out = "latex", numformat = formatfunc(digits = 1, nsmall = 1, big.mark = ','),  # make latex table for S3
   summ = c("notNA(x)","mean(x)", "min(x)","max(x)","sd(x)"),
   file = "Tables/TableS3_desc_continuous.tex")

# Descriptive statistics: comparing to 2024 ANES (Table S4) ----
anes <- anes %>% # rename variables
  rename(therm.dem = V241166,
         therm.rep = V241167,
         pid7 = V241227x,
         educ = V241463,
         age = V241458x,
         use.fb = V242578,
         use.twitter = V242580,
         gender = V241551,
         race = V241501x,
         pol.int = V241004,
         post.pol.fb = V242579,
         post.pol.twitter = V242581,
         visit.fb = V242577a, #
         visit.twitter = V242577b, #
         visit.instagram = V242577c, #
         visit.reddit = V242577d,#
         visit.yt = V242577e, #
         visit.snap = V242577f, #
         visit.tiktok = V242577g,#
         weight.pre.full = V240107a,
         weight.post.full = V240107b,
         psu.full = V240107c,
         strata.full = V240107d)

anes <- anes %>%
  mutate(across(c(therm.rep, therm.dem, pid7, age, # replace values less than 0 with NA
                  starts_with("visit"), starts_with("use")),
                ~ case_when(. < 0 ~ NA_real_,
                            TRUE ~ .))) %>%
  mutate(gender = case_when(gender == 1 ~ "Man", # make gender categorical variable
                            gender == 2 ~ "Woman",
                            gender %in% c(3,4) ~ "Something else"),
         race = case_when(race == 1 ~ "White", # make race categorical variable
                          race == 2 ~ "Black",
                          race == 3 ~ "Hispanic",
                          race == 4 ~ "Asian",
                          race %in% c(5,6) ~ "Mixed/Other"),
         therm.inparty = case_when(pid7 < 4 ~ therm.dem, # assign in-party therms
                                   pid7 > 4 ~ therm.rep),
         therm.outparty = case_when(pid7 < 4 ~ therm.rep, # assign out-party therms
                                    pid7 > 4 ~ therm.dem),
         post.pol.fb = case_when(post.pol.fb %in% c(1:5) ~ post.pol.fb), # remove ineligible responses
         post.pol.twitter = case_when(post.pol.twitter %in% c(1:5) ~ post.pol.twitter), # remove ineligible responses
         post.pol.sm = case_when(post.pol.fb < 5 | post.pol.twitter < 5 ~ 1, # 1 if sometimes post political content on twitter or facebook
                                 post.pol.fb == 5 & post.pol.twitter == 5 ~ 0), # 0 if never post political content on twitter and facebook
         pol.int = case_when(pol.int %in% c(1:5) ~ 6 - pol.int), # reverse code political interest so higher values = more interest
         educ.college = case_when(educ %in% c(1:10) ~ 0, # 0 if less than college degree
                                  educ %in% c(11:16) ~ 1), # 1 if college degrees (>= associates)
         sm.count = visit.fb + visit.twitter + visit.instagram + visit.reddit + visit.yt + visit.snap + visit.tiktok, # count sm platforms visited in last year
         soc.med.user = case_when(sm.count >= 4 ~ 1, # 1 if visited 4+ sm platforms
                                  sm.count < 4 ~ 0), # 0 if visited <4 sm platforms
         pid3 = case_when(pid7 < 4 ~ "Democratic", # make pid3 categorical variable
                          pid7 == 4 ~ "Pure Independent",
                          pid7 > 4 ~ "Republican"))

anes_des <- anes %>%
  drop_na(weight.post.full, psu.full, strata.full) %>%
  as_survey_design(weights = weight.post.full,
                   ids = psu.full, 
                   strata = strata.full, 
                   nest = T)

civility <- mydata %>%
  summarize(mean.age = mean(age, na.rm = T),
            pct.man = mean(gender3 == "Man", na.rm = TRUE) * 100,
            pct.woman = mean(gender3 == "Woman", na.rm = TRUE) * 100,
            pct.other = mean(gender3 == "Something else", na.rm = TRUE) * 100,
            pct.dem = mean(pid3 == "Democratic", na.rm = T) * 100,
            pct.rep = mean(pid3 == "Republican", na.rm = T) * 100,
            pct.ind = mean(pid3 == "Pure Independent", na.rm = T) * 100,
            pct.asian = mean(race6 == "Asian", na.rm = TRUE) * 100,
            pct.black = mean(race6 == "Black", na.rm = TRUE) * 100,
            pct.hispanic = mean(race6 == "Hispanic", na.rm = TRUE) * 100,
            pct.mixed.other = mean(race6 == "Mixed/Other", na.rm = TRUE) * 100,
            pct.white = mean(race6 == "White", na.rm = TRUE) * 100,
            mean.college = mean(educ.college, na.rm = T)*100,
            mean.sm.use = mean(sm.count, na.rm = T),
            mean.pol.int = mean(pol.int, na.rm = T),
            mean.inparty = mean(pre.therm.inparty, na.rm = T),
            mean.outparty = mean(pre.therm.outparty, na.rm = T)) %>%
  pivot_longer(everything(), names_to = "variable", values_to = "value") %>%
  mutate(sample = "civility")

anes_full <- anes_des %>%
  summarize(mean.age = survey_mean(age, na.rm = T),
            pct.man = survey_mean(gender == "Man", na.rm = TRUE) * 100,
            pct.woman = survey_mean(gender == "Woman", na.rm = TRUE) * 100,
            pct.other = survey_mean(gender == "Something else", na.rm = TRUE) * 100,
            pct.dem = survey_mean(pid3 == "Democratic", na.rm = T) * 100,
            pct.rep = survey_mean(pid3 == "Republican", na.rm = T) * 100,
            pct.ind = survey_mean(pid3 == "Pure Independent", na.rm = T) * 100,
            pct.asian = survey_mean(race == "Asian", na.rm = TRUE) * 100,
            pct.black = survey_mean(race == "Black", na.rm = TRUE) * 100,
            pct.hispanic = survey_mean(race == "Hispanic", na.rm = TRUE) * 100,
            pct.mixed.other = survey_mean(race == "Mixed/Other", na.rm = TRUE) * 100,
            pct.white = survey_mean(race == "White", na.rm = TRUE) * 100,
            mean.college = survey_mean(educ.college, na.rm = T)*100,
            mean.sm.use = survey_mean(sm.count, na.rm = T),
            mean.pol.int = survey_mean(pol.int, na.rm = T),
            mean.inparty = survey_mean(therm.inparty, na.rm = T),
            mean.outparty = survey_mean(therm.outparty, na.rm = T)) %>%
  select(-ends_with("_se")) %>%
  pivot_longer(everything(), names_to = "variable", values_to = "value") %>%
  mutate(sample = "anes_full")

anes_sm_user <- anes_des %>%
  filter(soc.med.user == 1) %>%
  summarize(mean.age = survey_mean(age, na.rm = T),
            pct.man = survey_mean(gender == "Man", na.rm = TRUE) * 100,
            pct.woman = survey_mean(gender == "Woman", na.rm = TRUE) * 100,
            pct.other = survey_mean(gender == "Something else", na.rm = TRUE) * 100,
            pct.dem = survey_mean(pid3 == "Democratic", na.rm = T) * 100,
            pct.rep = survey_mean(pid3 == "Republican", na.rm = T) * 100,
            pct.ind = survey_mean(pid3 == "Pure Independent", na.rm = T) * 100,
            pct.asian = survey_mean(race == "Asian", na.rm = TRUE) * 100,
            pct.black = survey_mean(race == "Black", na.rm = TRUE) * 100,
            pct.hispanic = survey_mean(race == "Hispanic", na.rm = TRUE) * 100,
            pct.mixed.other = survey_mean(race == "Mixed/Other", na.rm = TRUE) * 100,
            pct.white = survey_mean(race == "White", na.rm = TRUE) * 100,
            mean.college = survey_mean(educ.college, na.rm = T)*100,
            mean.sm.use = survey_mean(sm.count, na.rm = T),
            mean.pol.int = survey_mean(pol.int, na.rm = T),
            mean.inparty = survey_mean(therm.inparty, na.rm = T),
            mean.outparty = survey_mean(therm.outparty, na.rm = T)) %>%
  select(-ends_with("_se")) %>%
  pivot_longer(everything(), names_to = "variable", values_to = "value") %>%
  mutate(sample = "anes_sm_user")

anes_post <- anes_des %>%
  filter(post.pol.sm == 1) %>%
  summarize(mean.age = survey_mean(age, na.rm = T),
            pct.man = survey_mean(gender == "Man", na.rm = TRUE) * 100,
            pct.woman = survey_mean(gender == "Woman", na.rm = TRUE) * 100,
            pct.other = survey_mean(gender == "Something else", na.rm = TRUE) * 100,
            pct.dem = survey_mean(pid3 == "Democratic", na.rm = T) * 100,
            pct.rep = survey_mean(pid3 == "Republican", na.rm = T) * 100,
            pct.ind = survey_mean(pid3 == "Pure Independent", na.rm = T) * 100,
            pct.asian = survey_mean(race == "Asian", na.rm = TRUE) * 100,
            pct.black = survey_mean(race == "Black", na.rm = TRUE) * 100,
            pct.hispanic = survey_mean(race == "Hispanic", na.rm = TRUE) * 100,
            pct.mixed.other = survey_mean(race == "Mixed/Other", na.rm = TRUE) * 100,
            pct.white = survey_mean(race == "White", na.rm = TRUE) * 100,
            mean.college = survey_mean(educ.college, na.rm = T)*100,
            mean.sm.use = survey_mean(sm.count, na.rm = T),
            mean.pol.int = survey_mean(pol.int, na.rm = T),
            mean.inparty = survey_mean(therm.inparty, na.rm = T),
            mean.outparty = survey_mean(therm.outparty, na.rm = T)) %>%
  select(-ends_with("_se")) %>%
  pivot_longer(everything(), names_to = "variable", values_to = "value") %>%
  mutate(sample = "anes_post")


final_table <- bind_rows(civility, anes_full, anes_sm_user, anes_post) %>% 
    pivot_wider(names_from = sample, values_from = value) %>%
    mutate(across(
      where(is.numeric),
      ~ format(round(.x, 1), nsmall = 1)
    )) %>%
    mutate(across(
      -variable,
      ~ ifelse(grepl("^pct", variable),
               paste0(format(round(as.numeric(.x), 1), nsmall = 1)),
               format(round(as.numeric(.x), 1), nsmall = 1)
      )
    ))

cat("\n==============================\n")
cat("Table S4: Comparing sample to general population and social media users\n")
cat("==============================\n")
(final_table <- bind_rows(civility, anes_full, anes_sm_user, anes_post) %>% 
    pivot_wider(names_from = sample, values_from = value) %>%
    mutate(across(
      where(is.numeric),
      ~ format(round(.x, 1), nsmall = 1)
    )) %>%
    mutate(across(
      -variable,
      ~ ifelse(grepl("^pct", variable),
               paste0(format(round(as.numeric(.x), 1), nsmall = 1)),
               format(round(as.numeric(.x), 1), nsmall = 1)
      )
    )))

table_S4 <- final_table %>% # print table in latex
  kable(
    format = "latex", 
    booktabs = TRUE,
    digits = 1,
    caption = "Summary statistics for continuous variables",
    col.names = c("Variable", "Our", "ANES", "ANES", "ANES")
  )
writeLines(table_S4, "Tables/TableS4_desc_comparisons.tex")

# Descriptive statistics: platform data (Table S5) ----
all.posts.cond <- all.posts %>% left_join(ids, by = "participant_code") %>% drop_na(condition) # join condition to posts
all.comments.cond <- all.comments %>% left_join(ids, by = "participant_code") %>% drop_na(condition) # join condition to comments
ds.ps <- data.frame(
  stat = c("Number of posts", 
           "Number of unique posters", 
           "Mean characters per post",
           "Mean tokens per post",
           "Mean toxicity per post",
           "Proportion of respondents making 1+ post",
           "Mean total characters per respondent",
           "Mean total tokens per respondent",
           "Mean number of posts",
           "Number of comments", 
           "Number of unique commenters", 
           "Mean characters per comment",
           "Mean tokens per comment",
           "Mean toxicity per comment",
           "Proportion of respondents making 1+ comments",
           "Mean total characters per respondent",
           "Mean total tokens per respondent",
           "Mean number of comments"),
  all = c(all.posts.cond %>% summarize(val = n()) %>% pull(val) %>% as.numeric(), # number of posts
          all.posts.cond %>% summarize(val = n_distinct(participant_code)) %>% pull(val) %>% as.numeric(), # number of unique posters
          mean(all.posts.cond$chars.posts, na.rm = T), # mean characters per post
          mean(all.posts.cond$tokens.posts, na.rm = T), # mean tokens per post
          mean(all.posts.cond$tox, na.rm = T), # mean toxicity per post
          mean(mydata$made.post, na.rm = T), # prop making a 1+ posts
          mean(mydata$sum.chars.posts, na.rm = T), # mean total characters in posts per respondent
          mean(mydata$sum.tokens.posts, na.rm = T), # mean total tokens in posts per respondent
          mean(mydata$n.posts, na.rm = T), # mean number of posts
          all.comments.cond %>% summarize(val = n()) %>% pull(val) %>% as.numeric(), # number of comments
          all.comments.cond %>% summarize(val = n_distinct(participant_code)) %>% pull(val) %>% as.numeric(), # number of unique commenters
          mean(all.comments.cond$chars.comments, na.rm = T), # mean characters per comment
          mean(all.comments.cond$tokens.comments, na.rm = T), # mean tokens per comment
          mean(all.comments.cond$tox, na.rm = T), # mean toxicity per comment
          mean(mydata$made.comment, na.rm = T), # prop making 1+ comments
          mean(mydata$sum.chars.comments, na.rm = T), # mean total characters in comments per respondent
          mean(mydata$sum.tokens.comments, na.rm = T), # mean total tokens in comments per respondent
          mean(mydata$n.comments, na.rm = T) # mean number of comments
  ),
  
  civil = c(all.posts.cond %>% filter(condition == "civil") %>% summarize(val = n()) %>% pull(val) %>% as.numeric(), # number of posts
            all.posts.cond %>% filter(condition == "civil") %>% summarize(val = n_distinct(participant_code)) %>% pull(val) %>% as.numeric(), # number of unique posters
            as.numeric(t.test(all.posts.cond$chars.posts ~ all.posts.cond$condition)$estimate[1]), # mean characters per post
            as.numeric(t.test(all.posts.cond$tokens.posts ~ all.posts.cond$condition)$estimate[1]), # mean tokens per post
            as.numeric(t.test(all.posts.cond$tox ~ all.posts.cond$condition)$estimate[1]), # mean toxicity per post
            as.numeric(t.test(mydata$made.post ~ mydata$condition)$estimate[1]), # prop making a 1+ posts
            as.numeric(t.test(mydata$sum.chars.posts ~ mydata$condition)$estimate[1]), # mean total characters in posts per respondent
            as.numeric(t.test(mydata$sum.tokens.posts ~ mydata$condition)$estimate[1]), # mean total tokens in posts per respondent
            as.numeric(t.test(mydata$n.posts ~ mydata$condition)$estimate[1]), # mean number of posts
            all.comments.cond %>% filter(condition == "civil") %>% summarize(val = n()) %>% pull(val) %>% as.numeric(), # number of comments
            all.comments.cond %>% filter(condition == "civil") %>% summarize(val = n_distinct(participant_code)) %>% pull(val) %>% as.numeric(), # number of unique comments
            as.numeric(t.test(all.comments.cond$chars.comments ~ all.comments.cond$condition)$estimate[1]), # mean characters per comment
            as.numeric(t.test(all.comments.cond$tokens.comments ~ all.comments.cond$condition)$estimate[1]), # mean tokens per comment
            as.numeric(t.test(all.comments.cond$tox ~ all.comments.cond$condition)$estimate[1]), # mean toxicity per comment
            as.numeric(t.test(mydata$made.comment ~ mydata$condition)$estimate[1]), # prop making 1+ comments
            as.numeric(t.test(mydata$sum.chars.comments ~ mydata$condition)$estimate[1]), # mean total characters in comments per respondent
            as.numeric(t.test(mydata$sum.tokens.comments ~ mydata$condition)$estimate[1]), # mean total tokens in comments per respondent
            as.numeric(t.test(mydata$n.comments ~ mydata$condition)$estimate[1]) # mean number of comments
  ),
  
  uncivil = c(all.posts.cond %>% filter(condition == "uncivil") %>% summarize(val = n()) %>% pull(val) %>% as.numeric(), # number of posts
              all.posts.cond %>% filter(condition == "uncivil") %>% summarize(val = n_distinct(participant_code)) %>% pull(val) %>% as.numeric(), # number of unique posters
              as.numeric(t.test(all.posts.cond$chars.posts ~ all.posts.cond$condition)$estimate[2]), # mean characters per post
              as.numeric(t.test(all.posts.cond$tokens.posts ~ all.posts.cond$condition)$estimate[2]), # mean tokens per post
              as.numeric(t.test(all.posts.cond$tox ~ all.posts.cond$condition)$estimate[2]), # mean toxicity per post
              as.numeric(t.test(mydata$made.post ~ mydata$condition)$estimate[2]), # prop making 1+ posts
              as.numeric(t.test(mydata$sum.chars.posts ~ mydata$condition)$estimate[2]), # mean total characters in posts per respondent
              as.numeric(t.test(mydata$sum.tokens.posts ~ mydata$condition)$estimate[2]), # mean total tokens in posts per respondent
              as.numeric(t.test(mydata$n.posts ~ mydata$condition)$estimate[2]), # mean number of posts
              all.comments.cond %>% filter(condition == "uncivil") %>% summarize(val = n()) %>% pull(val) %>% as.numeric(), # number of comments
              all.comments.cond %>% filter(condition == "uncivil") %>% summarize(val = n_distinct(participant_code)) %>% pull(val) %>% as.numeric(), # number of unique comments
              as.numeric(t.test(all.comments.cond$chars.comments ~ all.comments.cond$condition)$estimate[2]), # mean characters per comment
              as.numeric(t.test(all.comments.cond$tokens.comments ~ all.comments.cond$condition)$estimate[2]), # mean tokens per comment
              as.numeric(t.test(all.comments.cond$tox ~ all.comments.cond$condition)$estimate[2]), # mean toxicity per comment
              as.numeric(t.test(mydata$made.comment ~ mydata$condition)$estimate[2]), # prop making 1+ comments
              as.numeric(t.test(mydata$sum.chars.comments ~ mydata$condition)$estimate[2]), # mean total characters in comments per respondent
              as.numeric(t.test(mydata$sum.tokens.comments ~ mydata$condition)$estimate[2]), # mean total tokens in comments per respondent
              as.numeric(t.test(mydata$n.comments ~ mydata$condition)$estimate[2]) # mean number of comments
  ),
  
  p = c(NA,
        NA,
        as.numeric(t.test(all.posts.cond$chars.posts ~ all.posts.cond$condition)$p.value), # p for difference in characters per post by condition
        as.numeric(t.test(all.posts.cond$tokens.posts ~ all.posts.cond$condition)$p.value), # p for difference in tokens per post by condition
        as.numeric(t.test(all.posts.cond$tox ~ all.posts.cond$condition)$p.value), # p for difference in toxicity per post by condition
        as.numeric(t.test(mydata$made.post ~ mydata$condition)$p.value), # p for difference in proportion making post by condition
        as.numeric(t.test(mydata$sum.chars.posts ~ mydata$condition)$p.value), # p for difference in total characters in posts per respondent by condition
        as.numeric(t.test(mydata$sum.tokens.posts ~ mydata$condition)$p.value), # p for difference in total tokens in posts per respondent by condition
        as.numeric(t.test(mydata$n.posts ~ mydata$condition)$p.value), # p difference in number of posts by condition
        NA,
        NA,
        as.numeric(t.test(all.comments.cond$chars.comments ~ all.comments.cond$condition)$p.value), # p for difference in characters per comment by condition
        as.numeric(t.test(all.comments.cond$tokens.comments ~ all.comments.cond$condition)$p.value), # p for difference in tokens per comment by condition 
        as.numeric(t.test(all.comments.cond$tox ~ all.comments.cond$condition)$p.value), # p for difference in toxicity per comment by condition
        as.numeric(t.test(mydata$made.comment ~ mydata$condition)$p.value), # p for difference in proportion making comment by condition
        as.numeric(t.test(mydata$sum.chars.comments ~ mydata$condition)$p.value), # p for difference in total characters in comments per respondent by condition
        as.numeric(t.test(mydata$sum.tokens.comments ~ mydata$condition)$p.value), # p for difference in total tokens in comments per respondent by condition
        as.numeric(t.test(mydata$n.comments ~ mydata$condition)$p.value) # p for difference in number of comments by condition
  )
)

ds.ps <- ds.ps %>% # make latex table for S5
  mutate(stat = paste0("\\quad ", stat),
         all = format(round(as.numeric(all), 1), nsmall = 1),
         civil = format(round(as.numeric(civil), 1), nsmall = 1),
         uncivil = format(round(as.numeric(uncivil), 1), nsmall = 1),
         p = format(round(as.numeric(p), 1), nsmall = 1)) 

cat("\n==============================\n")
cat("Table S5: Summary statistics for posts and comments\n")
cat("==============================\n")
xtable(ds.ps)
print(xtable(ds.ps), include.rownames=FALSE, 
      sanitize.text.function = identity, 
      file = "Tables/TableS5_desc_platform.tex")

# Descriptive statistics: balance table (Table S6) ----
balance.ps <- data.frame(variable = c("Education", "Gender", "Race", "3 point party identity","Age", # variable names
                                      "Pre-treatment in-party thermometer","Pre-treatment out-party thermometer",
                                      "Pre-treatment 7-point placement (Democratic)", "Pre-treatment 7-point placement (Republican)",
                                      "Pre-treatment political trust","Pre-treatment satisfaction with democracy"),
                         test = c(rep("Chi squared",4), rep("KS test", 7)), # test type
                         p = c(as.numeric(chisq.test(table(mydata$condition, mydata$educ4))$p.value), # p value
                               as.numeric(chisq.test(table(mydata$condition, mydata$female))$p.value),
                               as.numeric(chisq.test(table(mydata$condition, mydata$race6))$p.value),
                               as.numeric(chisq.test(table(mydata$condition, mydata$pid3))$p.value),
                               as.numeric(ks.test(age ~ condition, data = mydata)$p.value),
                               as.numeric(ks.test(pre.therm.inparty ~ condition, data = mydata)$p.value),
                               as.numeric(ks.test(pre.therm.outparty ~ condition, data = mydata)$p.value),
                               as.numeric(ks.test(pre.climplc.dem ~ condition, data = mydata)$p.value),
                               as.numeric(ks.test(pre.climplc.rep ~ condition, data = mydata)$p.value),
                               as.numeric(ks.test(pre.trust ~ condition, data = mydata)$p.value),
                               as.numeric(ks.test(pre.democ2 ~ condition, data = mydata)$p.value))
)
balance.ps$p <- format(round(balance.ps$p, 2), nsmall = 1) # format p-values


cat("\n==============================\n")
cat("Table S6: Balance table\n")
cat("==============================\n")
xtable(balance.ps)
print(xtable(balance.ps), include.rownames = F,
      file = "Tables/TableS6_balance.tex") # make latex table for S6

# Descriptive statistics: compare civil vs. uncivil users (Table S7) ----

tox_post_tab <- mydata %>% # table comparing toxic posters
  filter(made.post == 1 & !is.na(tox.posts)) %>%  # posters only
  mutate(q = ntile(tox.posts, 4), # make post toxicity quartiles
         tox_group = case_when(q == 1 ~ "Low", # lowest quartile post toxicity
                               q %in% c(2, 3) ~ "Medium", # middle quartiles post toxicity
                               q == 4 ~ "High"), # highest quartile post toxicity
         tox_group = factor(tox_group, levels = c("Low","Medium","High"))) %>% # reorder
  group_by(tox_group) %>% # group by post toxicity quartile
  summarize(
    # age,education, gender
    mean.age   = mean(age, na.rm = TRUE),
    pct.college = mean(educ.college, na.rm = TRUE) * 100,
    pct.female  = mean(female == "Woman", na.rm = TRUE) * 100,
    # race
    pct.asian   = mean(race6 == "Asian", na.rm = TRUE) * 100,
    pct.black   = mean(race6 == "Black", na.rm = TRUE) * 100,
    pct.hisp    = mean(race6 == "Hispanic", na.rm = TRUE) * 100,
    pct.mixed   = mean(race6 == "Mixed/Other", na.rm = TRUE) * 100,
    pct.white   = mean(race6 == "White", na.rm = TRUE) * 100,
    # partisanship
    pct.dem     = mean(pid3 == "Democratic", na.rm = TRUE) * 100,
    pct.rep     = mean(pid3 == "Republican", na.rm = TRUE) * 100,
    pct.ind     = mean(pid3 == "Pure Independent", na.rm = TRUE) * 100,
    # politics + SM use
    mean.pol.int = mean(pol.int, na.rm = TRUE),
    mean.inparty = mean(pre.therm.inparty, na.rm = TRUE),
    mean.outparty= mean(pre.therm.outparty, na.rm = TRUE),
    mean.sm.use  = mean(sm.count, na.rm = TRUE),
    # behaviors
    mean.posts = mean(n.posts, na.rm = TRUE),
    mean.comments = mean(n.comments, na.rm = TRUE),
    mean.tox.posts = mean(tox.posts, na.rm = T),
    mean.tox.comments = mean(tox.comments, na.rm = TRUE),
    mean.comfort = mean(comfort.sharing, na.rm = T),
    .groups = "drop"
  ) %>%
  pivot_longer(-tox_group, names_to = "variable", values_to = "value") %>%
  mutate(value = round(value, 1)) %>%
  pivot_wider(names_from = tox_group, values_from = value)

no_post_tab <- mydata %>% # table comparing non-posters
  filter(made.post == 0) %>%  # non-posters only
  summarize(
    # age,education, gender
    mean.age   = mean(age, na.rm = TRUE),
    pct.college = mean(educ.college, na.rm = TRUE) * 100,
    pct.female  = mean(female == "Woman", na.rm = TRUE) * 100,
    # race
    pct.asian   = mean(race6 == "Asian", na.rm = TRUE) * 100,
    pct.black   = mean(race6 == "Black", na.rm = TRUE) * 100,
    pct.hisp    = mean(race6 == "Hispanic", na.rm = TRUE) * 100,
    pct.mixed   = mean(race6 == "Mixed/Other", na.rm = TRUE) * 100,
    pct.white   = mean(race6 == "White", na.rm = TRUE) * 100,
    # partisanship
    pct.dem     = mean(pid3 == "Democratic", na.rm = TRUE) * 100,
    pct.rep     = mean(pid3 == "Republican", na.rm = TRUE) * 100,
    pct.ind     = mean(pid3 == "Pure Independent", na.rm = TRUE) * 100,
    # politics + SM use
    mean.pol.int = mean(pol.int, na.rm = TRUE),
    mean.inparty = mean(pre.therm.inparty, na.rm = TRUE),
    mean.outparty= mean(pre.therm.outparty, na.rm = TRUE),
    mean.sm.use  = mean(sm.count, na.rm = TRUE),
    # behaviors
    mean.posts = mean(n.posts, na.rm = TRUE),
    mean.comments = mean(n.comments, na.rm = TRUE),
    mean.tox.posts = mean(tox.posts, na.rm = T),
    mean.tox.comments = mean(tox.comments, na.rm = TRUE),
    mean.comfort = mean(comfort.sharing, na.rm = T)
  ) %>%
  pivot_longer(everything(),names_to = "variable", values_to = "None") %>%
  mutate(None = round(None, 1)) 

post_tab <- no_post_tab %>% # combine tables for posters and non-posters
    full_join(tox_post_tab, by = "variable") %>%
    mutate(across(-variable, ~ format(round(.x, 1), nsmall = 1)))


tox_comment_tab <- mydata %>% # table comparing toxic commenters
  filter(made.comment == 1 & !is.na(tox.comments)) %>%  # commenters only
  mutate(q = ntile(tox.comments, 4), # make comment toxicity quartiles
         tox_group = case_when(q == 1 ~ "Low", # lowest quartile comment toxicity
                               q %in% c(2, 3) ~ "Medium", # middle quartiles comment toxicity
                               q == 4 ~ "High"), # highest quartile comment toxicity
         tox_group = factor(tox_group, levels = c("Low","Medium","High"))) %>% # reorder
  group_by(tox_group) %>% # group by comment toxicity quartile
  summarize(
    # age,education, gender
    mean.age   = mean(age, na.rm = TRUE),
    pct.college = mean(educ.college, na.rm = TRUE) * 100,
    pct.female  = mean(female == "Woman", na.rm = TRUE) * 100,
    # race
    pct.asian   = mean(race6 == "Asian", na.rm = TRUE) * 100,
    pct.black   = mean(race6 == "Black", na.rm = TRUE) * 100,
    pct.hisp    = mean(race6 == "Hispanic", na.rm = TRUE) * 100,
    pct.mixed   = mean(race6 == "Mixed/Other", na.rm = TRUE) * 100,
    pct.white   = mean(race6 == "White", na.rm = TRUE) * 100,
    # partisanship
    pct.dem     = mean(pid3 == "Democratic", na.rm = TRUE) * 100,
    pct.rep     = mean(pid3 == "Republican", na.rm = TRUE) * 100,
    pct.ind     = mean(pid3 == "Pure Independent", na.rm = TRUE) * 100,
    # politics + SM use
    mean.pol.int = mean(pol.int, na.rm = TRUE),
    mean.inparty = mean(pre.therm.inparty, na.rm = TRUE),
    mean.outparty= mean(pre.therm.outparty, na.rm = TRUE),
    mean.sm.use  = mean(sm.count, na.rm = TRUE),
    # behaviors
    mean.posts = mean(n.posts, na.rm = TRUE),
    mean.comments = mean(n.comments, na.rm = TRUE),
    mean.tox.posts = mean(tox.posts, na.rm = T),
    mean.tox.comments = mean(tox.comments, na.rm = TRUE),
    mean.comfort = mean(comfort.sharing, na.rm = T),
    .groups = "drop"
  ) %>%
  pivot_longer(-tox_group, names_to = "variable", values_to = "value") %>%
  mutate(value = round(value, 1)) %>%
  pivot_wider(names_from = tox_group, values_from = value)

no_comment_tab <- mydata %>% # table for non-commenters
  filter(made.comment == 0) %>%  # non-commenters
  summarize(
    # age,education, gender
    mean.age   = mean(age, na.rm = TRUE),
    pct.college = mean(educ.college, na.rm = TRUE) * 100,
    pct.female  = mean(female == "Woman", na.rm = TRUE) * 100,
    # race
    pct.asian   = mean(race6 == "Asian", na.rm = TRUE) * 100,
    pct.black   = mean(race6 == "Black", na.rm = TRUE) * 100,
    pct.hisp    = mean(race6 == "Hispanic", na.rm = TRUE) * 100,
    pct.mixed   = mean(race6 == "Mixed/Other", na.rm = TRUE) * 100,
    pct.white   = mean(race6 == "White", na.rm = TRUE) * 100,
    # partisanship
    pct.dem     = mean(pid3 == "Democratic", na.rm = TRUE) * 100,
    pct.rep     = mean(pid3 == "Republican", na.rm = TRUE) * 100,
    pct.ind     = mean(pid3 == "Pure Independent", na.rm = TRUE) * 100,
    # politics + SM use
    mean.pol.int = mean(pol.int, na.rm = TRUE),
    mean.inparty = mean(pre.therm.inparty, na.rm = TRUE),
    mean.outparty= mean(pre.therm.outparty, na.rm = TRUE),
    mean.sm.use  = mean(sm.count, na.rm = TRUE),
    # behaviors
    mean.posts = mean(n.posts, na.rm = TRUE),
    mean.comments = mean(n.comments, na.rm = TRUE),
    mean.tox.posts = mean(tox.posts, na.rm = T),
    mean.tox.comments = mean(tox.comments, na.rm = TRUE),
    mean.comfort = mean(comfort.sharing, na.rm = T)
  ) %>%
  pivot_longer(everything(),names_to = "variable", values_to = "None") %>%
  mutate(None = round(None, 1)) 




cat("\n==============================\n")
cat("Table S7: Personal characteristics and online engagement by toxicity of posts and comments\n")
cat("==============================\n")
(comment_tab <- no_comment_tab %>% # combine tables for commenters and non-commenters
    full_join(tox_comment_tab, by = "variable") %>%
    mutate(across(-variable, ~ format(round(.x, 1), nsmall = 1))))
(post_tab)
table_S7 <- post_tab %>% # make latex table for S7
  full_join(comment_tab, by = "variable") %>%
  kable(format = "latex", booktabs = TRUE, linesep = "") 
writeLines(table_S7, "Tables/TableS7_toxicity_by_user_type.tex")

# First-difference models (Table S10) ----
mod.h4a.diff <- lm(scale(diff.therm.inparty) ~ condition.uncivil, data = mydata) # change in in-party therm
mod.h4a.diff.t <- summary(mod.h4a.diff)$coefficients[,3] # t values
mod.h4a.diff.p <- ifelse(mod.h4a.diff.t < 0, pt(mod.h4a.diff.t, mod.h4a.diff$df.residual,lower=T), 
                         pt(mod.h4a.diff.t, mod.h4a.diff$df.residual,lower=F)) # one-tailed p

mod.h4b.diff <- lm(scale(diff.therm.outparty) ~ condition.uncivil, data = mydata) # change in out-party therm
mod.h4b.diff.t <- summary(mod.h4b.diff)$coefficients[,3] # t values
mod.h4b.diff.p <- ifelse(mod.h4b.diff.t < 0, pt(mod.h4b.diff.t, mod.h4b.diff$df.residual,lower=T), 
                         pt(mod.h4b.diff.t, mod.h4b.diff$df.residual,lower=F)) # one-tailed p

mod.h5.diff <- lm(scale(diff.clim.polar) ~ condition.uncivil, data = mydata) # change in perceived polarization
mod.h5.diff.t <- summary(mod.h5.diff)$coefficients[,3] # t values
mod.h5.diff.p <- ifelse(mod.h5.diff.t < 0, pt(mod.h5.diff.t, mod.h5.diff$df.residual,lower=T), 
                        pt(mod.h5.diff.t, mod.h5.diff$df.residual,lower=F)) # one-tailed p

mod.h6.diff <- lm(scale(diff.trust) ~ condition.uncivil, data = mydata) # change in trust
mod.h6.diff.t <- summary(mod.h6.diff)$coefficients[,3] # t values
mod.h6.diff.p <- ifelse(mod.h6.diff.t < 0, pt(mod.h6.diff.t, mod.h6.diff$df.residual,lower=T), 
                        pt(mod.h6.diff.t, mod.h6.diff$df.residual,lower=F)) # one-tailed p

mod.h7.diff <- lm(scale(diff.democ) ~ condition.uncivil, data = mydata) # change in democratic satisfaction
mod.h7.diff.t <- summary(mod.h7.diff)$coefficients[,3] # t values
mod.h7.diff.p <- ifelse(mod.h7.diff.t < 0, pt(mod.h7.diff.t, mod.h7.diff$df.residual,lower=T), 
                        pt(mod.h7.diff.t, mod.h7.diff$df.residual,lower=F)) # one-tailed p


mods.diff <- list(mod.h4a.diff, mod.h4b.diff, mod.h5.diff, mod.h6.diff, mod.h7.diff)

cat("\n==============================\n")
cat("Table S10: ATE of assignment to uncivil newsfeed on within-subject change outcomes (H4-H7)\n")
cat("==============================\n")
stargazer(mods.diff, # make latex table S10
          title = "ATE of assignment to uncivil newsfeed on within-subject change outcomes (H4-H7)",
          dep.var.labels.include = T,
          dep.var.labels = c("In-party thermometer", "Out-party thermometer", "Perceived polarization",
                             "Trust", "Democratic satisfaction"),
          p = list(mod.h4a.diff.p, mod.h4b.diff.p,
                   mod.h5.diff.p,
                   mod.h6.diff.p,
                   mod.h7.diff.p),
          covariate.labels = c("Uncivil newsfeed", "(Constant)"),
          keep.stat = c("n", "adj.rsq"),
          star.char    = c("\\dagger", "*", "**"),
          report = ('vc*s'),
          digits = 3,
          align = T,
          no.space = T,
          out = "Tables/TableS10_ate_change.tex")


# Toxicity w/ random intercepts (Table S11) ----
tox.posts.df <- all.posts %>% # join survey data to all posts
  left_join(mydata, by = "participant_code") 

tox.posts.mod <- lmerTest::lmer(scale(tox) ~ condition.uncivil + (1|participant_code), data = tox.posts.df) # model with random intercepts
class(tox.posts.mod) <- "lmerMod"
tox.posts.mod.t <- summary(tox.posts.mod)$coefficients[,3] # t values
tox.posts.mod.p <- ifelse(tox.posts.mod.t < 0, pt(tox.posts.mod.t, df.residual(tox.posts.mod),lower=T), # one-sided p values
                          pt(tox.posts.mod.t, df.residual(tox.posts.mod),lower=F)) 

tox.comments.df <- all.comments %>% # join survey data to all comments
  left_join(mydata %>% select(condition.uncivil, participant_code, pid7), 
            by = "participant_code") %>%
  mutate(post.type = case_when(post.party == "Republican" & pid7 %in% c(5:7) ~ "In-party",
                               post.party == "Democratic" & pid7 %in% c(1:3) ~ "In-party",
                               post.party == "Republican" & pid7 %in% c(1:3) ~ "Out-party",
                               post.party == "Democratic" & pid7 %in% c(5:7) ~ "Out-party"))

tox.comments.mod <- lmerTest::lmer(scale(tox) ~ condition.uncivil + (1|participant_code), data = tox.comments.df) # model with random intercepts
class(tox.comments.mod) <- "lmerMod"
tox.comments.mod.t <- summary(tox.comments.mod)$coefficients[,3] # t values
tox.comments.mod.p <- ifelse(tox.comments.mod.t < 0, pt(tox.comments.mod.t, df.residual(tox.comments.mod),lower=T), # one-sided p values
                             pt(tox.comments.mod.t, df.residual(tox.comments.mod),lower=F)) 

tox.outparty.comments.mod <- lmerTest::lmer(scale(tox) ~ condition.uncivil + (1|participant_code), 
                                            subset = (post.type == "Out-party"),
                                            data = tox.comments.df) # model with random intercepts
class(tox.outparty.comments.mod) <- "lmerMod"
tox.outparty.comments.mod.t <- summary(tox.outparty.comments.mod)$coefficients[,3] # t values
tox.outparty.comments.mod.p <- ifelse(tox.outparty.comments.mod.t < 0, pt(tox.outparty.comments.mod.t, df.residual(tox.outparty.comments.mod),lower=T), # one-sided p values
                                      pt(tox.outparty.comments.mod.t, df.residual(tox.outparty.comments.mod),lower=F)) 

tox.mods <- list(tox.posts.mod, tox.comments.mod, tox.outparty.comments.mod)
cat("\n==============================\n")
cat("Table S11: ATE of assignment to uncivil newsfeed on post and comment toxicity with random intercepts\n")
cat("==============================\n")
stargazer(tox.mods, # make latex table for S11
          title = "ATE of assignment to uncivil newsfeed on post and comment toxicity with random intercepts",
          p = list(tox.posts.mod.p, tox.comments.mod.p, tox.outparty.comments.mod.p),
          digits = 3,
          star.char    = c("\\dagger", "*", "**"),
          no.space = TRUE,  # No extra space between columns
          covariate.labels = c("Uncivil condition", "(Constant)"),
          align = T,
          keep.stat = c("n","aic"),
          out = "Tables/TableS11_ate_toxicity_random_intercepts.tex")

# NLP measures of posts and comments, desc table (Table S16) ----
text_measures_posts <- all.posts %>%  # extract measures for posts
  left_join(ids, by = "participant_code") %>%
  select(participant_code, condition, text, tox, vader, identity.attack, insult, profanity, politeness) %>%
  mutate(text.type = "Posts") 
text_measures_comments <- all.comments %>% # extract measures for comments
  left_join(ids, by = "participant_code") %>%
  select(participant_code, condition, text, tox, vader, identity.attack, insult, profanity, politeness) %>%
  mutate(text.type = "Comments") 
text_measures <- rbind(text_measures_posts, text_measures_comments) # combine

text_measures_tab <- text_measures %>%
  group_by(text.type, condition) %>%
  summarize(mean.tox = mean(tox, na.rm = T),
            mean.identity.attack = mean(identity.attack, na.rm = T),
            mean.insult = mean(insult, na.rm = T),
            mean.profanity = mean(profanity, na.rm = T),
            mean.politeness = -1*mean(politeness, na.rm = T),
            mean.vader = -1*mean(vader, na.rm = T),
            .groups = "drop") %>%
  pivot_longer(cols = starts_with("mean."),
               names_to = "measure",
               values_to = "value") %>%
  pivot_wider(names_from = condition,
              values_from = value) %>%
  relocate(uncivil, .before = civil) %>%
  mutate(diff = uncivil - civil)

text_measures_tab$p <- c(as.numeric(t.test(text_measures_comments$tox ~ text_measures_comments$condition)$p.value),
                         as.numeric(t.test(text_measures_comments$identity.attack ~ text_measures_comments$condition)$p.value),
                         as.numeric(t.test(text_measures_comments$insult ~ text_measures_comments$condition)$p.value),
                         as.numeric(t.test(text_measures_comments$profanity ~ text_measures_comments$condition)$p.value),
                         as.numeric(t.test(-1*text_measures_comments$politeness ~ text_measures_comments$condition)$p.value),
                         as.numeric(t.test(-1*text_measures_comments$vader ~ text_measures_comments$condition)$p.value),
                         as.numeric(t.test(text_measures_posts$tox ~ text_measures_posts$condition)$p.value),
                         as.numeric(t.test(text_measures_posts$identity.attack ~ text_measures_posts$condition)$p.value),
                         as.numeric(t.test(text_measures_posts$insult ~ text_measures_posts$condition)$p.value),
                         as.numeric(t.test(text_measures_posts$profanity ~ text_measures_posts$condition)$p.value),
                         as.numeric(t.test(-1*text_measures_posts$politeness ~ text_measures_posts$condition)$p.value),
                         as.numeric(t.test(-1*text_measures_posts$vader ~ text_measures_posts$condition)$p.value))
text_measures_tab <- text_measures_tab %>%
  mutate(uncivil = format(round(uncivil, 3), nsmall = 2),
         civil = format(round(civil, 3), nsmall = 2),
         diff = format(round(diff, 3), nsmall = 2),
         p = format(round(p, 3), nsmall = 2))
colnames(text_measures_tab) <- c("Content Type","Measure","Uncivil","Civil","Difference","P-value")

cat("\n==============================\n")
cat("Table S16: Comparing features of user-generated content by condition\n")
cat("==============================\n")
print(text_measures_tab)
print(xtable(text_measures_tab), include.rownames = F, 
      file = "Tables/TableS16_means_diffs_convo_features.tex") # make latex table for S16

# NLP measures of posts and comments, intercepts models (Tables S14-15) ----
mod_text_measures_posts_politeness <- lmerTest::lmer(-1*scale(politeness) ~ condition + (1|participant_code), data = text_measures_posts) 
class(mod_text_measures_posts_politeness) <- "lmerMod"

mod_text_measures_posts_vader <- lmerTest::lmer(-1*scale(vader) ~ condition + (1|participant_code), data = text_measures_posts) 
class(mod_text_measures_posts_vader) <- "lmerMod"

mod_text_measures_posts_identity.attack <- lmerTest::lmer(scale(identity.attack) ~ condition + (1|participant_code), data = text_measures_posts) 
class(mod_text_measures_posts_identity.attack) <- "lmerMod"

mod_text_measures_posts_insult <- lmerTest::lmer(scale(insult) ~ condition + (1|participant_code), data = text_measures_posts) 
class(mod_text_measures_posts_insult) <- "lmerMod"

mod_text_measures_posts_profanity <- lmerTest::lmer(scale(profanity) ~ condition + (1|participant_code), data = text_measures_posts) 
class(mod_text_measures_posts_profanity) <- "lmerMod"

mods1 <- list(mod_text_measures_posts_politeness, 
              mod_text_measures_posts_vader, 
              mod_text_measures_posts_identity.attack, 
              mod_text_measures_posts_insult, 
              mod_text_measures_posts_profanity)

cat("\n==============================\n")
cat("Table S14: ATE of assignment to uncivil newsfeed on conversational features of posts with random intercepts\n")
cat("==============================\n")
stargazer(mods1, # make latex table for S14
          title = "ATE of assignment to uncivil newsfeed on conversational features of posts with random intercepts",
          dep.var.labels.include = T,
          dep.var.labels = c("Politeness","VADER","Identity attack","Insult","profanity"),
          covariate.labels = c("Uncivil newsfeed", "(Constant)"),
          keep.stat = c("n", "aic"),
          star.char    = c("\\dagger", "*", "**"),
          digits = 3,
          align = T,
          no.space = T,
          out = "Tables/TableS14_ate_post_convo_features_random_intercepts.tex")

mod_text_measures_comments_politeness <- lmerTest::lmer(-1*scale(politeness) ~ condition + (1|participant_code), data = text_measures_comments) 
class(mod_text_measures_comments_politeness) <- "lmerMod"

mod_text_measures_comments_vader <- lmerTest::lmer(-1*scale(vader) ~ condition + (1|participant_code), data = text_measures_comments) 
class(mod_text_measures_comments_vader) <- "lmerMod"

mod_text_measures_comments_identity.attack <- lmerTest::lmer(scale(identity.attack) ~ condition + (1|participant_code), data = text_measures_comments) 
class(mod_text_measures_comments_identity.attack) <- "lmerMod"

mod_text_measures_comments_insult <- lmerTest::lmer(scale(insult) ~ condition + (1|participant_code), data = text_measures_comments) 
class(mod_text_measures_comments_insult) <- "lmerMod"

mod_text_measures_comments_profanity <- lmerTest::lmer(scale(profanity) ~ condition + (1|participant_code), data = text_measures_comments) 
class(mod_text_measures_comments_profanity) <- "lmerMod"

mods1 <- list(mod_text_measures_comments_politeness, 
              mod_text_measures_comments_vader, 
              mod_text_measures_comments_identity.attack, 
              mod_text_measures_comments_insult, 
              mod_text_measures_comments_profanity)

cat("\n==============================\n")
cat("Table S15: ATE of assignment to uncivil newsfeed on conversational features of comments with random intercepts\n")
cat("==============================\n")
stargazer(mods1, # make latex table for S15
          title = "ATE of assignment to uncivil newsfeed on conversational features of comments with random intercepts",
          dep.var.labels.include = T,
          dep.var.labels = c("Politeness","VADER","Identity attack","Insult","Profanity"),
          covariate.labels = c("Uncivil newsfeed", "(Constant)"),
          keep.stat = c("n", "aic"),
          star.char    = c("\\dagger", "*", "**"),
          digits = 3,
          align = T,
          no.space = T,
          out = "Tables/TableS15_ate_comment_convo_features_random_intercepts.tex")

# Commenting and toxicity by post type, political vs. nonpolitical (Table S17) ----
mod.commented.inparty <- lm(commented.inparty ~ condition.uncivil, data = mydata)
mod.commented.outparty <- lm(commented.outparty ~ condition.uncivil, data = mydata)
mod.commented.nonpolitical <- lm(commented.nonpolitical ~ condition.uncivil, data = mydata)

mydata.comment.tox <- all.comments %>%
  left_join(ids, by = "participant_code") %>%
  left_join(pids, by = "participant_code") %>%
  mutate(comment.inparty = case_when(post.party == "Democratic" & pid7 %in% c(1:3) ~ 1,
                                     post.party == "Republican" & pid7 %in% c(5:7) ~ 1,
                                     post.party == "Republican" & pid7 %in% c(1:3) ~ 0,
                                     post.party == "Democratic" & pid7 %in% c(5:7) ~ 0),
         comment.outparty = case_when(post.party == "Republican" & pid7 %in% c(1:3) ~ 1,
                                      post.party == "Democratic" & pid7 %in% c(5:7) ~ 1,
                                      post.party == "Democratic" & pid7 %in% c(1:3) ~ 0,
                                      post.party == "Republican" & pid7 %in% c(5:7) ~ 0),
         comment.nonpolitical = case_when(post.type %in% c("Bonus","Filler") ~ 1),
         condition.uncivil = case_when(condition == "uncivil" ~ 1,
                                       condition == "civil" ~ 0)) 

mod.comment.tox.inparty <- lmerTest::lmer(tox ~ condition.uncivil + (1|participant_code), data = mydata.comment.tox, subset = (comment.inparty == 1))
mod.comment.tox.outparty <- lmerTest::lmer(tox ~ condition.uncivil + (1|participant_code), data = mydata.comment.tox, subset = (comment.outparty == 1))
mod.comment.tox.nonpolitical <- lmerTest::lmer(tox ~ condition.uncivil + (1|participant_code), data = mydata.comment.tox, subset = (comment.nonpolitical == 1))

tab_s17_pvals <- sapply(list(mod.commented.inparty, mod.commented.outparty, mod.commented.nonpolitical,
                     mod.comment.tox.inparty, mod.comment.tox.outparty, mod.comment.tox.nonpolitical), function(m) 
  summary(m)$coefficients["condition.uncivil", "Pr(>|t|)"])

class(mod.comment.tox.inparty) <- "lmerMod"
class(mod.comment.tox.outparty) <- "lmerMod"
class(mod.comment.tox.nonpolitical) <- "lmerMod"

mods <- list(mod.commented.inparty, mod.commented.outparty, mod.commented.nonpolitical,
             mod.comment.tox.inparty, mod.comment.tox.outparty, mod.comment.tox.nonpolitical)

cat("\n==============================\n")
cat("Table S17: ATE of assignment to uncivil newsfeed on commenting behavior\n")
cat("==============================\n")
stargazer(mods, # make latex table S17
          title = "ATE of assignment to uncivil newsfeed on commenting behavior",
          dep.var.labels = c("In-party","Out-party","Non-political", "In-party","Out-party","Non-political"),
          covariate.labels = c("Uncivil newsfeed","(Constant)"),
          keep.stat = c("n","adj.rsq","AIC"),
          align = T,
          no.space = T,
          star.char    = c("\\dagger", "*", "**"),
          out = "Tables/TableS17_comment_behavior_by_post_type.tex")

tab_s17_pvals

# Pretesting: NLP (Figures S2) ----

cat("\n==============================\n")
cat("Toxicity of Pretested content\n")
cat("==============================\n")
researcher.posts %>% filter(method == "measure.toxicity") %>% group_by(content.type) %>% summarize(mean = mean(measure)) # mean toxicity of pretested research-generated posts
researcher.comments %>% filter(method == "measure.toxicity") %>% group_by(content.type) %>% summarize(mean = mean(measure)) # mean toxicity of pretested researcher-generated comments
bot.comments %>% filter(method == "measure.toxicity") %>% group_by(content.type) %>% summarize(mean = mean(measure)) # mean toxicity of pretested gpt-comments

plot.posts <- researcher.posts %>%
  filter(content.type != "Filler") %>%
  group_by(method) %>%
  mutate(measure = scale(measure)) %>%
  group_by(content.type, method, group, party) %>%
  summarize(mean = as.numeric(mean_se(measure)[1]),
            lwr = as.numeric(mean_se(measure, mult = 1.645)[2]),
            upr = as.numeric(mean_se(measure, mult = 1.645)[3]),
            text.type = "Posts")

plot.comments <- researcher.comments %>% 
  group_by(method) %>%
  mutate(measure = scale(measure)) %>%
  group_by(content.type, method, group, party) %>%
  summarize(mean = as.numeric(mean_se(measure)[1]),
            lwr = as.numeric(mean_se(measure, mult = 1.645)[2]),
            upr = as.numeric(mean_se(measure, mult = 1.645)[3]),
            text.type = "Comments")

plot.gpt <- bot.comments %>% 
  group_by(method) %>%
  mutate(measure = scale(measure)) %>%
  group_by(content.type, method, group, bot.party) %>%
  summarize(mean = as.numeric(mean_se(measure)[1]),
            lwr = as.numeric(mean_se(measure, mult = 1.645)[2]),
            upr = as.numeric(mean_se(measure, mult = 1.645)[3])) %>%
  mutate(text.type = "GPT comments",
         content.type = case_when(content.type == "response.civil" ~ "Civil",
                                  content.type == "response.uncivil" ~ "Uncivil")) %>%
  rename(party = bot.party)

figure_S2 <- rbind(plot.posts, plot.comments, plot.gpt)

cat("\n==============================\n")
cat("Figure S2: Measuring text features of research-generated posts, comments, and GPT comments\n")
cat("==============================\n")
print(figure_S2)

figure_S2 %>%
  mutate(text.type = factor(text.type, levels = c("Posts","Comments","GPT comments"))) %>%
  mutate(group = factor(group, levels = c("Toxicity","Civility","Sentiment"))) %>%
  ggplot(aes(x = method, y = mean, color = party, group = content.type, shape = content.type)) +
  geom_point(aes(shape = content.type, group = party), position = position_dodge(width = 0.4), size = 3.5) +
  geom_errorbar(aes(ymin = lwr, ymax = upr, group = party), position = position_dodge(width = 0.4), width = 0) +
  geom_hline(yintercept = 0, color = "red", linetype = "dashed") +
  facet_grid(cols = vars(group), rows = vars(text.type), scales = "free", space = "free_x") +
  scale_x_discrete("Text Tool", labels = c("measure.toxicity" = "Perspectives",
                                           "measure.gpt.civility" = "GPT-4",
                                           "measure.afinn" = "AFINN",
                                           "measure.bing" = "Bing",
                                           "measure.google" = "Google Cloud",
                                           "measure.gpt" = "GPT-4",
                                           "measure.nrc" = "NRC",
                                           "measure.syuzhet" = "Syuzhet")) +
  scale_y_continuous("Value\n(standardized)") +
  scale_color_manual("Party", values = c("#123260","#bb301b"), breaks = c("Democratic","Republican")) +
  scale_shape_manual("Text type", values = c(16,17)) +
  theme_bw() +
  theme(axis.text.x = element_text(size = 16, angle = 90, hjust = 1, margin = margin(t = 10)),
        axis.text.y = element_text(size = 16, margin = margin(r = 10)),
        axis.title.x = element_text(margin = margin(20,0,0,0,"pt")),
        axis.title.y = element_text(margin = margin(0,10,0,0,"pt")),
        axis.text = element_text(color = "black"),
        axis.ticks = element_blank(),
        axis.title = element_text(size = 16),
        legend.title = element_text(size = 14),
        legend.text = element_text(size = 12),
        legend.position = "bottom",
        panel.border = element_rect(color = "black", linewidth = 1, fill = NA),
        panel.grid = element_blank(),
        plot.background = element_rect(color = "white"),
        strip.text = element_text(size = 14, color = "black"),
        strip.background = element_blank(),
        plot.margin = margin(0.5,0.5,0.15,0.25, unit = "in"))
ggsave(height = 8, width = 8.5, "Figures/FigureS2_pretest_nlp.png")

# Pretesting: Connect (Figures S3-S4) ----

pt.posts.means <- pt.posts %>%
  group_by(text.type, text.party) %>%
  drop_na(rating) %>%
  summarize(mean = mean(rating, na.rm = T),
            sd = sd(rating, na.rm = T),
            n = n(),
            se = sd/sqrt(n()),
            lwr = mean - 1.96*se,
            upr = mean + 1.96*se,
            content = "Posts")


pt.comments.means <- pt.comments %>%
  group_by(text.type, text.party) %>%
  drop_na(rating) %>%
  summarize(mean = mean(rating, na.rm = T),
            sd = sd(rating, na.rm = T),
            n = n(),
            se = sd/sqrt(n()),
            lwr = mean - 1.96*se,
            upr = mean + 1.96*se,
            content = "Comments")

pt.gpt.means <- pt.gpt %>%
  rename(text.party = bot.party, text.type = bot.type) %>%
  group_by(text.type, text.party) %>%
  drop_na(rating) %>%
  summarize(mean = mean(rating, na.rm = T),
            sd = sd(rating, na.rm = T),
            n = n(),
            se = sd/sqrt(n()),
            lwr = mean - 1.96*se,
            upr = mean + 1.96*se,
            content = "GPT comments")

figure_S3 <- rbind(pt.posts.means, pt.comments.means, pt.gpt.means)
figure_S3 <- figure_S3 %>%
  mutate(content = factor(content, levels = rev(c("Posts","Comments","GPT comments"))),
         text.party = case_when(text.party %in% c("Democratic","Democrat") ~ "(a) Democrat",
                                text.party == "Republican" ~ "(b) Republican",
                                TRUE ~ text.party))

cat("\n==============================\n")
cat("Figure S3: Mean civility ratings of newsfeed content by content type and partisanship of contributing profile (Connect pretest)\n")
cat("==============================\n")
print(figure_S3)
figure_S3 %>%
  ggplot(aes(x = text.type, y = mean, color = content)) +
  geom_point(size = 3.5, position = position_dodge(width = 0.2)) +
  facet_wrap(~text.party, ncol = 1, scales = "free_y") +
  geom_errorbar(aes(ymin = lwr, ymax = upr), position = position_dodge(width = 0.2), width = 0) +
  scale_x_discrete("Text type") +
  scale_y_continuous("Civility (7-point)", limits = c(1,7), breaks = seq(1,7,1)) +
  scale_color_manual("", values = c("#F8766D","#00BA38","#619CFF"), 
                     breaks = c("Posts","Comments","GPT comments")) +
  theme_bw() +
  theme(axis.text.x = element_text(size = 16, hjust = 1, margin = margin(t = 10)),
        axis.text.y = element_text(size = 16, margin = margin(r= 10)),
        axis.text = element_text(color = "black"),
        axis.title.x = element_text(margin = margin(20,0,0,0,"pt")),
        axis.title.y = element_text(margin = margin(0,10,0,0,"pt")),
        axis.ticks = element_blank(),
        axis.title = element_text(size = 16),
        legend.title = element_text(size = 14),
        legend.text = element_text(size = 12),
        legend.position = "bottom",
        panel.border = element_rect(color = "black", linewidth = 1, fill = NA),
        panel.grid = element_blank(),
        plot.background = element_rect(color = "white"),
        strip.text = element_text(size = 14),
        strip.background = element_blank(),
        plot.margin = margin(0.1,0.75,0.15,0.15, unit = "in")) +
  coord_flip()
ggsave(height = 5, width = 6, "Figures/FigureS3_pretest_human_mean_civility.png")


pt.posts.pair.correct <- pt.posts.pair %>%
  rename(text.party = party) %>%
  drop_na(correct3) %>%
  group_by(text.party) %>%
  mutate(total.n = n()) %>%
  group_by(text.party, correct3, total.n) %>%
  summarize(group.n = n()) %>%
  mutate(pct = group.n/total.n,
         content = "Posts")

pt.comments.pair.correct <- pt.comments.pair %>%
  drop_na(correct3) %>%
  group_by(text.party) %>%
  mutate(total.n = n()) %>%
  group_by(text.party, correct3, total.n) %>%
  summarize(group.n = n()) %>%
  mutate(pct = group.n/total.n,
         content = "Comments")

pt.gpt.pair.correct <- pt.gpt.pair %>%
  rename(text.party = bot.party) %>%
  drop_na(correct3) %>%
  group_by(text.party) %>%
  mutate(total.n = n()) %>%
  group_by(text.party, correct3, total.n) %>%
  summarize(group.n = n()) %>%
  mutate(pct = group.n/total.n,
         content = "GPT comments")

figure_S4 <- rbind(pt.posts.pair.correct, pt.comments.pair.correct, pt.gpt.pair.correct)
figure_S4 <- figure_S4 %>%
  mutate(text.party = case_when(text.party %in% c("Democratic","Democrat") ~ "(a) Democrat",
                                text.party == "Republican" ~ "(b) Republican",
                                TRUE ~ text.party),
         content = factor(content, levels = c("Posts","Comments","GPT comments")))

cat("\n==============================\n")
cat("Figure S4: Share correctly and incorrectly identifying uncivil content by type and partisanship of contributing profile (Connect pretest)\n")
cat("==============================\n")
print(figure_S4)

figure_S4 %>%
  ggplot(aes(x = correct3, y = pct, fill = content)) +
  geom_bar(stat = "identity", position = "dodge", width = 0.5) +
  facet_wrap(~text.party) +
  scale_x_discrete("Text type") +
  scale_y_continuous("Proportion", limits = c(0,1), breaks = seq(0,1,0.25)) +
  scale_fill_manual("", values = c("#F8766D","#00BA38","#619CFF"), 
                    breaks = c("Posts","Comments","GPT comments")) +
  theme_bw() +
  theme(axis.text.x = element_text(size = 16),
        axis.text.y = element_text(size = 16),
        axis.text = element_text(color = "black"),
        axis.title.x = element_text(margin = margin(20,0,0,0,"pt")),
        axis.title.y = element_text(margin = margin(0,10,0,0,"pt")),
        axis.ticks = element_blank(),
        axis.title = element_text(size = 16),
        legend.title = element_text(size = 14),
        legend.text = element_text(size = 12),
        legend.position = "bottom",
        panel.border = element_rect(color = "black", linewidth = 1, fill = NA),
        panel.grid = element_blank(),
        plot.background = element_rect(color = "white"),
        panel.grid.minor = element_blank(),
        strip.text = element_text(size = 14),
        strip.background = element_blank(),
        plot.margin = margin(0.5,0.25,0,0.15, unit = "in"))
ggsave(height = 5, width = 7, "Figures/FigureS4_pretest_human_identifying_civility.png")

# Bot ID plot (Figure S5) ----
cat("\n==============================\n")
cat("Proportion of respondents report no bot activity or only 1-2 bots\n")
cat("==============================\n")
prop.table(table(mydata$bots %in% c(1,2)))

cat("\n==============================\n")
cat("Figure S5: Responses to bot encounters question on post-treatment survey\n")
cat("==============================\n")
figure_S5 <- mydata %>% # plot figure S5
  select(condition, bots) %>%
  drop_na() %>%
  group_by(condition, bots) %>% 
  summarize(n = n()) %>%
  group_by(condition) %>%
  mutate(total = sum(n), # calculate percent
         pct = round((n/total)*100,1),
         label = paste0(pct,"%","\n(N=", n,")"))

print(figure_S5)
ggplot(figure_S5, aes(x = bots, y = n, fill = condition)) +
  geom_bar(stat = "identity", position = position_dodge(width=0.9)) +
  geom_text(aes(y = n, label = label), vjust = -0.5, position = position_dodge(width = 0.9), size = 4, color = "black") +
  scale_x_continuous("Encounter any bots?",
                     breaks = seq(1,4,1),
                     labels = c("No bots","1-2 bots","Several bots","Many bots")) +
  scale_y_continuous("Count", limits = c(0,300)) +
  scale_fill_discrete("", labels = c("Civil","Uncivil")) +
  theme_bw() +
  theme(
    axis.text = element_text(color = "black"),
    axis.title = element_text(size = 16),
    axis.title.x = element_text(margin = margin(t = 10)),
    axis.title.y = element_text(margin = margin(r = 10)),
    axis.text.x = element_text(size = 16),
    axis.text.y = element_text(size = 16),
    strip.background = element_blank(),
    strip.placement = "outside",  # Place facet labels outside the plot area
    strip.text = element_text(size = 14),
    legend.title = element_text(size = 14),
    legend.text = element_text(size = 12),
    legend.position = "bottom",
    panel.border = element_rect(color = "black", fill = NA, linewidth = 1),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    plot.margin = margin(0.25, 0.5, 0.25, 0.1, "in"),
    axis.ticks = element_blank()
  )
ggsave(height = 5.5, width = 7.25, "Figures/FigureS5_bot_id_by_condition.png")

# Descriptive statistics: bot comments by user content (Figure S6) ----
figure_S6 <- mydata %>% # make figure S6
  mutate(n.content = case_when(n.content >= 15 ~ 15, TRUE ~ n.content)) %>% # if more than 15 content created, assign as 15
  group_by(n.content) %>% # group by number of content created
  summarize(mean = mean(n.bot.comments, na.rm = T)) # calculate mean number of bot comments

cat("\n==============================\n")
cat("Figure S6: Distribution of synthetic user comments by number of participant posts/comments\n")
cat("==============================\n")
print(figure_S6)
ggplot(figure_S6, aes(x = n.content, y = mean)) + # plot
  geom_bar(stat = "identity") +
  scale_x_continuous("Number of participant comments/posts",
                     breaks = seq(0,15,5),
                     labels = c("0","5","10","15")) +
  scale_y_continuous("Number of synthetic user comments", 
                     breaks = seq(0,30,5),
                     limits = c(0,30)) +
  theme_bw() +
  theme(
    axis.title.x = element_text(margin = margin(t = 10)),
    axis.title.y = element_text(margin = margin(r = 10)),
    axis.text = element_text(color = "black"),
    axis.title = element_text(size = 16),
    axis.text.x = element_text(size = 16),
    axis.text.y = element_text(size = 16),
    strip.background = element_blank(),
    strip.placement = "outside",  # Place facet labels outside the plot area
    strip.text = element_text(size = 14),
    legend.title = element_text(size = 14),
    legend.text = element_text(size = 12),
    panel.border = element_rect(color = "black", fill = NA, linewidth = 1),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    plot.margin = margin(0.25, 0.5, 0.25, 0.25, "in"),
    axis.ticks = element_blank()
  )
ggsave(height = 5.5, width = 7.25, "Figures/FigureS6_bot_comments_by_user_content.png")

# Distribution of posts and comments (Figure S7) ----

figure_S7a <- mydata %>% # plot Figure 7a
  mutate(n.posts = case_when(n.posts >= 4 ~ 4, TRUE ~ n.posts)) %>% # cap at 4 posts
  group_by(condition, n.posts) %>%
  summarize(n = n()) %>%
  group_by(condition) %>%
  mutate(total = sum(n),
         prop = n/total)


figure_S7b <- mydata %>% # plot Figure 7b
  mutate(n.comments = case_when(n.comments >= 10 ~ 10, TRUE ~ n.comments)) %>% # cap at 10 comments
  group_by(condition, n.comments) %>%
  summarize(n = n()) %>%
  group_by(condition) %>%
  mutate(total = sum(n),
         prop = n/total) 

cat("\n==============================\n")
cat("Figure S7: Distribution of post and comment counts by condition\n")
cat("==============================\n")
print(figure_S7a)
print(figure_S7b)
ggplot(figure_S7a, aes(x = n.posts, y = prop, fill = condition)) +
  geom_bar(stat = "identity", position = position_dodge(width = 0.9)) +
  labs(x = "Number of posts", y = "Proportion of Respondents") +
  scale_fill_discrete("Condition", labels = c("Civil","Uncivil")) +
  scale_x_continuous(limits = c(-0.5,4.5), breaks = seq(0,4,1), labels = c("0","1","2","3","4+")) +
  scale_y_continuous(limits = c(0,0.6), breaks = seq(0,0.6,0.1)) +
  theme_bw() +
  theme(
    axis.text = element_text(color = "black"),
    axis.title = element_text(size = 16),
    axis.title.x = element_text(margin = margin(t = 10)),
    axis.title.y = element_text(margin = margin(r = 10)),
    axis.text.x = element_text(size = 16),
    axis.text.y = element_text(size = 16),
    strip.background = element_blank(),
    strip.placement = "outside",  # Place facet labels outside the plot area
    strip.text = element_text(size = 14),
    legend.title = element_text(size = 14),
    legend.text = element_text(size = 12),
    legend.position = "bottom",
    panel.border = element_rect(color = "black", fill = NA, linewidth = 1),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    plot.margin = margin(0.25, 0.5, 0.25, 0.25, "in"),
    axis.ticks = element_blank()
  )
ggsave(height = 5, width = 7.25, "Figures/FigureS7a_post_distribution_by_condition.png")

ggplot(figure_S7b, aes(x = n.comments, y = prop, fill = condition)) +
  geom_bar(stat = "identity", position = position_dodge(width = 0.7), width = 0.7) +
  labs(x = "Number of Comments", y = "Proportion of Respondents") +
  scale_fill_discrete("Condition", labels = c("Civil","Uncivil")) +
  scale_x_continuous(limits = c(-0.5,10.5), breaks = seq(0,10,1), labels = c("0","1","2","3","4","5",
                                                                             "6","7","8","9","10+")) +
  theme_bw() +
  theme(
    axis.title = element_text(size = 16),
    axis.title.x = element_text(margin = margin(t = 10)),
    axis.title.y = element_text(margin = margin(r = 10)),
    axis.text.x = element_text(size = 16),
    axis.text.y = element_text(size = 16),
    axis.text = element_text(color = "black"),
    strip.background = element_blank(),
    strip.placement = "outside",  # Place facet labels outside the plot area
    strip.text = element_text(size = 14),
    legend.title = element_text(size = 14),
    legend.text = element_text(size = 12),
    legend.position = "bottom",
    panel.border = element_rect(color = "black", fill = NA, linewidth = 1),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    plot.margin = margin(0.25, 0.5, 0.25, 0.25, "in"),
    axis.ticks = element_blank()
  )
ggsave(height = 5, width = 7.25, "Figures/FigureS7b_comment_distribution_by_condition.png")

# Eligibility & attrition (values for Figure S8) ----
total_n <- df.intake %>% # 1652 in intake w/ anonymous prolific id
  nrow() 

consent_drop <- df.intake %>% # 2 in intake w/ anonymous prolific id who did not consent
  filter(as.numeric(consent) == 0) %>% 
  nrow()

jive_drop <- df.intake %>% # 39 in intake w/ anonymous prolific id who uses ChatterChain 'some' or 'alot' 
  filter(platforms.cc %in% c(2,3)) %>% # 'A lot' or 'some' use of ChatterChain (fake platform)
  nrow()

device_drop <- df.intake %>% # 1 in intake w/ anonymous prolific id who had device type 'something else' (i.e., non iPhone or Android)
  filter(device == 5) %>% # device = "something else"
  nrow()

download_drop <- df.intake %>% # 7 in intake w/ anonymous prolific id who were unwilling to download app
  filter((download.apple == 2) | (download.android == 2)) %>% # unwilling to download iphone or android app
  nrow()

image_drop <- df.intake %>% # 0 in intake w/ anonymous prolific id who got attention check wrong
  filter(as.numeric(screen.image) != 3.0) %>%
  filter(!is.na(screen.image)) %>%
  nrow()

age_drop <- df.intake %>% # 1 in intake w/ anonymous prolific id whose age < 18 
  filter(age < 18 & !is.na(age)) %>%
  nrow()

us_drop <- df.intake %>% # 0 in intake w/ anonymous prolific id who did not reside in US
  filter(state == 0) %>% # state = "I do not reside in the United States'
  nrow()

intake_completes_n <- df.intake %>% # 1593 in intake w/ anonymous prolific id and assigned participant_code (reached the end of intake)
  drop_na(anon_id, participant_code) %>%
  nrow()

intake_elig_attrit <- consent_drop + device_drop + download_drop + age_drop + us_drop # 11 attrit for eligibility criteria
intake_attent_attrit <- jive_drop + image_drop # 39 attrit for attention checks

intake_forced_attrit <- intake_elig_attrit + intake_attent_attrit # 50 forced attrition
intake_other_attrit <- total_n - intake_completes_n - intake_forced_attrit # 9 other attrition (not forced out, but didnt complete)
intake_total_attrit <- intake_forced_attrit + intake_other_attrit # 59 who started but did not complete intake

pretreat_n <- df.pre %>% # 1501 w/ anonymous prolific id and data in pre+intake
  left_join(df.intake, by = "participant_code") %>%
  drop_na(anon_id) %>%
  nrow()

intake_pretreat_attrit <- intake_completes_n - pretreat_n # 92 lost between completing intake and appearing in pre (i.e., during download)

pretreat_completes_n <- df.pre %>% # 1491 w/ anonymous prolific id, data in pre+intake and reached final page of pre
  left_join(df.intake, by = "participant_code") %>%
  drop_na(anon_id) %>%
  filter(as.numeric(pre.progress) == 100) %>% # finished pre-treatment
  nrow()

posttreat_n <- df.post %>% # 1467 w/ anonymous prolific id, data in pre+intake+post
  left_join(df.pre, by = "participant_code") %>%
  left_join(df.intake, by = "participant_code") %>%
  filter(as.numeric(pre.progress) == 100) %>%
  drop_na(anon_id) %>%
  nrow()

pre_post_attrit <- pretreat_completes_n - posttreat_n # 24 lost in newsfeed  (between completing pretreatment and appearing in post)

complete_participants <- df.post %>% # list of participant_codes for those with complete data
  left_join(df.intake, by = "participant_code") %>%
  left_join(df.pre, by = "participant_code") %>%
  left_join(prolific, by = "anon_id") %>%
  drop_na(anon_id, completion.code) %>%
  filter(as.numeric(pre.progress) == 100) %>%
  filter(as.numeric(post.progress) >= 93) %>%
  pull(participant_code)

n_completes <- length(complete_participants) # 1,462 completes (1 person gets dropped in final df due to data quality)

completion_rate <- length(complete_participants)/intake_completes_n # 91.8%

n_conditions <- mydata %>% # 734 in civil, 727 in uncivil
  group_by(condition.uncivil) %>%
  summarize(n.per.condition = n()) 


cat("\n==============================\n")
cat("INTAKE AND ATTRITION SUMMARY\n")
cat("==============================\n")

cat(sprintf("Total intake respondents: %d\n", total_n))
cat(sprintf("Did not consent: %d\n", consent_drop))
cat(sprintf("ChatterChain users removed (attention): %d\n", jive_drop))
cat(sprintf("Unsupported device removed: %d\n", device_drop))
cat(sprintf("Unwilling to download app: %d\n", download_drop))
cat(sprintf("Failed image attention check: %d\n", image_drop))
cat(sprintf("Under age 18: %d\n", age_drop))
cat(sprintf("Not residing in U.S.: %d\n", us_drop))

cat("\n")

cat(sprintf("Completed intake: %d\n", intake_completes_n))
cat(sprintf("Eligibility attrition: %d\n", intake_elig_attrit))
cat(sprintf("Attention-based attrition: %d\n", intake_attent_attrit))
cat(sprintf("Total forced attrition: %d\n", intake_forced_attrit))
cat(sprintf("Other intake attrition: %d\n", intake_other_attrit))
cat(sprintf("Total intake attrition: %d\n", intake_total_attrit))

cat("\n")

cat(sprintf("Entered pre-treatment: %d\n", pretreat_n))
cat(sprintf("Attrition between intake and pre-treatment: %d\n", intake_pretreat_attrit))
cat(sprintf("Completed pre-treatment: %d\n", pretreat_completes_n))
cat(sprintf("Entered post-treatment: %d\n", posttreat_n))
cat(sprintf("Attrition during newsfeed: %d\n", pre_post_attrit))

cat("\n")

cat(sprintf("Final complete participants: %d\n", n_completes))
cat(sprintf("Overall completion rate: %.1f%%\n", completion_rate * 100))

cat("\n")

cat(sprintf("Total participants removed for data quality: %d\n", n_completes - nrow(mydata)))
cat(sprintf("Total analytic sample: %d\n",nrow(mydata)))
cat("Participants per condition:\n")
print(n_conditions)

# Commenting and toxicity by post type, all post types (Figure S9) ----
  
  figure_S9 <- all.comments %>% # plot Figure S9
    left_join(ids, by = "participant_code") %>%
    mutate(post.type = case_when(post.type %in% c("Bonus", "Filler") ~ "Non-political",
                                 is.na(post.type) ~ "Own",
                                 TRUE ~ post.type),
           post.type = factor(post.type, levels = c("Civil","Uncivil","Non-political","Own")),
           condition = case_when(condition == "civil" ~ "(a) Civil",
                                 condition == "uncivil" ~ "(b) Uncivil")) %>%
    group_by(post.type, condition) %>%
    summarize(n = sum(!is.na(tox)),
              mean = mean(tox, na.rm = TRUE),
              se = sd(tox, na.rm = TRUE) / sqrt(n),
              .groups = "drop") %>%
    mutate(lower = mean - 1.96 * se,
           upper = mean + 1.96 * se,
           across(c(mean, se, lower, upper), ~ round(.x, 3))) 
  
  cat("\n==============================\n")
  cat("Figure S9: Comment toxicity by post type and condition\n")
  cat("==============================\n")
  print(figure_S9)

  ggplot(figure_S9, aes(x = post.type, y = mean, color = post.type)) +
    facet_wrap(condition~.) +
    geom_point(size = 4) +
    geom_errorbar(aes(ymin = lower, ymax = upper), linewidth = 1, width = 0) +
    scale_x_discrete("Post type") +
    scale_y_continuous("Toxicity", limits = c(0, 0.10),
                       breaks = seq(0,0.10, 0.025)) +
    theme_bw() +
    theme(plot.margin = margin(0.25, 0.25, 0, 0.25, "in"),
          axis.text = element_text(size = 16, color = "black"),
          axis.title = element_text(size = 16),
          axis.title.x = element_text(margin = margin(t=10)),
          axis.title.y = element_text(margin = margin(r=10)),
          legend.title = element_text(size = 14),
          legend.text = element_text(size = 12),
          legend.position = "none",
          panel.grid = element_blank(),
          strip.background = element_rect(fill = "white"),
          strip.text = element_text(size = 14, color = "black"),
          panel.grid.minor = element_blank())
  ggsave(height = 5, width = 10.25, "Figures/FigureS9_toxicity_by_post_type_by_condition.png")

# Toxicity over time (Figure S10) ----
set.seed(219)
figure_S10 <- all.comments %>%
  left_join(ids) %>%
  drop_na(tox) %>%  
  ggplot(aes(x = relativeTime_min, y = tox, color = condition)) +
  geom_smooth(method = "loess", se = TRUE, span = 0.5) +
  scale_x_continuous(limits = c(0,12), breaks = seq(0,12,1)) +
  labs(x = "Minutes since newsfeed start", y = "Toxicity") +
  scale_color_manual("",
                     values = brewer.pal(3, "PuOr")[c(1, 3)],
                     breaks = c("civil","uncivil"), 
                     labels = c("Civil","Uncivil")) +
  theme_bw() +
  theme(plot.margin = margin(0.25, 0.75, 0, 0.05, "in"),
        axis.text = element_text(size = 16, color = "black"),
        axis.title = element_text(size = 16),
        axis.title.x = element_text(margin = margin(t=10)),
        axis.title.y = element_text(margin = margin(r=10)),
        legend.title = element_text(size = 14),
        legend.text = element_text(size = 12),
        legend.position = "bottom",
        panel.grid = element_blank(),
        strip.background = element_rect(fill = "white"),
        strip.text = element_text(size = 14),
        panel.grid.minor = element_blank())
ggsave(height = 5, width = 6.25, "Figures/FigureS10_comment_toxicity_by_time_by_condition.png")


figure_S10 <- ggplot_build(figure_S10)
figure_S10 <- figure_S10$data[[1]] %>%
  dplyr::select(colour, x, y, ymin, ymax) %>%
  dplyr::arrange(colour, x)

cat("\n==============================\n")
cat("Figure S10: Comment toxicity across the 12-minute newsfeed session by condition\n")
cat("==============================\n")
print(figure_S10)


# End log file ----
end_time <- Sys.time()
run_time <- end_time - start_time
cat("\n==============================\n")
cat("END LOG FILE\n")
cat("==============================\n")
cat("Finished script at", as.character(end_time),
    "\nTotal run time:", as.numeric(run_time), "seconds")
sink()